LoongArch: Accelerate optimization of scalar signed/unsigned popcount.

In LoongArch, the vector popcount has corresponding instructions, while
the scalar does not. Currently, the scalar popcount is calculated
through a loop, and the value of a non-power of two needs to be iterated
several times, so the vector popcount instruction is considered for
optimization.

gcc/ChangeLog:

	* config/loongarch/loongarch.md (v2di): Used to simplify the
	following templates.
	(popcount<mode>2): New.

gcc/testsuite/ChangeLog:

	* gcc.target/loongarch/popcnt.c: New test.
	* gcc.target/loongarch/popcount.c: New test.
This commit is contained in:
Li Wei 2023-11-28 15:38:37 +08:00 committed by Lulu Cheng
parent ccc7702757
commit a68ae55883
3 changed files with 83 additions and 2 deletions

View File

@ -1512,7 +1512,30 @@
(set_attr "cnv_mode" "D2S")
(set_attr "mode" "SF")])
;; In vector registers, popcount can be implemented directly through
;; the vector instruction [X]VPCNT. For GP registers, we can implement
;; it through the following method. Compared with loop implementation
;; of popcount, the following method has better performance.
;; This attribute used for get connection of scalar mode and corresponding
;; vector mode.
(define_mode_attr cntmap [(SI "v4si") (DI "v2di")])
(define_expand "popcount<mode>2"
[(set (match_operand:GPR 0 "register_operand")
(popcount:GPR (match_operand:GPR 1 "register_operand")))]
"ISA_HAS_LSX"
{
rtx in = operands[1];
rtx out = operands[0];
rtx vreg = <MODE>mode == SImode ? gen_reg_rtx (V4SImode) :
gen_reg_rtx (V2DImode);
emit_insn (gen_lsx_vinsgr2vr_<size> (vreg, in, vreg, GEN_INT (1)));
emit_insn (gen_popcount<cntmap>2 (vreg, vreg));
emit_insn (gen_lsx_vpickve2gr_<size> (out, vreg, GEN_INT (0)));
DONE;
})
;;
;; ....................
;;
@ -3879,7 +3902,7 @@
(any_extend:SI (match_dup 3)))])]
"")
(define_mode_iterator QHSD [QI HI SI DI])

View File

@ -0,0 +1,41 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mlsx" } */
/* { dg-final { scan-assembler-not {popcount} } } */
/* { dg-final { scan-assembler-times "vpcnt.d" 2 { target { loongarch64*-*-* } } } } */
/* { dg-final { scan-assembler-times "vpcnt.w" 4 { target { loongarch64*-*-* } } } } */
int
foo (int x)
{
return __builtin_popcount (x);
}
long
foo1 (long x)
{
return __builtin_popcountl (x);
}
long long
foo2 (long long x)
{
return __builtin_popcountll (x);
}
int
foo3 (int *p)
{
return __builtin_popcount (*p);
}
unsigned
foo4 (int x)
{
return __builtin_popcount (x);
}
unsigned long
foo5 (int x)
{
return __builtin_popcount (x);
}

View File

@ -0,0 +1,17 @@
/* { dg-do compile } */
/* { dg-options "-O2 -mlsx -fdump-tree-optimized" } */
/* { dg-final { scan-tree-dump-times "__builtin_popcount|\\.POPCOUNT" 1 "optimized" } } */
int
PopCount (long b)
{
int c = 0;
while (b)
{
b &= b - 1;
c++;
}
return c;
}