mirror of
https://gcc.gnu.org/git/gcc.git
synced 2024-12-20 09:35:05 +08:00
i386: Introduce V2QImode minmax, abs and uavgv2hi3_ceil [PR103861]
Add V2QImode minmax, abs and uavxv2qi3_ceil operations with SSE registers. 2022-01-05 Uroš Bizjak <ubizjak@gmail.com> gcc/ChangeLog: PR target/103861 * config/i386/mmx.md (VI_16_32): New mode iterator. (VI1_16_32): Ditto. (mmxvecsize): Handle V2QI mode. (<smaxmin:code><mode>3): Rename from <smaxmin:code>v4qi3. Use VI1_16_32 mode iterator. (<umaxmin:code><mode>3): Rename from <umaxmin:code>v4qi3. Use VI1_16_32 mode iterator. (abs<mode>2): Use VI_16_32 mode iterator. (uavgv2qi3_ceil): New insn pattern. gcc/testsuite/ChangeLog: PR target/103861 * gcc.target/i386/pr103861-3.c: New test. * g++.dg/vect/slp-pr98855.cc (dg-final): Check that no vectorization using SLP was performed.
This commit is contained in:
parent
e3ef832a9e
commit
c166632bd2
@ -63,6 +63,12 @@
|
||||
;; 4-byte integer vector modes
|
||||
(define_mode_iterator VI_32 [V4QI V2HI])
|
||||
|
||||
;; 4-byte and 2-byte integer vector modes
|
||||
(define_mode_iterator VI_16_32 [V4QI V2QI V2HI])
|
||||
|
||||
;; 4-byte and 2-byte QImode vector modes
|
||||
(define_mode_iterator VI1_16_32 [V4QI V2QI])
|
||||
|
||||
;; V2S* modes
|
||||
(define_mode_iterator V2FI [V2SF V2SI])
|
||||
|
||||
@ -71,7 +77,8 @@
|
||||
|
||||
;; Mapping from integer vector mode to mnemonic suffix
|
||||
(define_mode_attr mmxvecsize
|
||||
[(V8QI "b") (V4QI "b") (V4HI "w") (V2HI "w") (V2SI "d") (V1DI "q")])
|
||||
[(V8QI "b") (V4QI "b") (V2QI "b")
|
||||
(V4HI "w") (V2HI "w") (V2SI "d") (V1DI "q")])
|
||||
|
||||
(define_mode_attr mmxdoublemode
|
||||
[(V8QI "V8HI") (V4HI "V4SI")])
|
||||
@ -2140,11 +2147,11 @@
|
||||
(match_operand:V4HI 2 "register_operand")))]
|
||||
"TARGET_MMX_WITH_SSE")
|
||||
|
||||
(define_insn "<code>v4qi3"
|
||||
[(set (match_operand:V4QI 0 "register_operand" "=Yr,*x,Yv")
|
||||
(smaxmin:V4QI
|
||||
(match_operand:V4QI 1 "register_operand" "%0,0,Yv")
|
||||
(match_operand:V4QI 2 "register_operand" "Yr,*x,Yv")))]
|
||||
(define_insn "<code><mode>3"
|
||||
[(set (match_operand:VI1_16_32 0 "register_operand" "=Yr,*x,Yv")
|
||||
(smaxmin:VI1_16_32
|
||||
(match_operand:VI1_16_32 1 "register_operand" "%0,0,Yv")
|
||||
(match_operand:VI1_16_32 2 "register_operand" "Yr,*x,Yv")))]
|
||||
"TARGET_SSE4_1"
|
||||
"@
|
||||
p<maxmin_int>b\t{%2, %0|%0, %2}
|
||||
@ -2218,11 +2225,11 @@
|
||||
(match_operand:V8QI 2 "register_operand")))]
|
||||
"TARGET_MMX_WITH_SSE")
|
||||
|
||||
(define_insn "<code>v4qi3"
|
||||
[(set (match_operand:V4QI 0 "register_operand" "=x,Yw")
|
||||
(umaxmin:V4QI
|
||||
(match_operand:V4QI 1 "register_operand" "%0,Yw")
|
||||
(match_operand:V4QI 2 "register_operand" "x,Yw")))]
|
||||
(define_insn "<code><mode>3"
|
||||
[(set (match_operand:VI1_16_32 0 "register_operand" "=x,Yw")
|
||||
(umaxmin:VI1_16_32
|
||||
(match_operand:VI1_16_32 1 "register_operand" "%0,Yw")
|
||||
(match_operand:VI1_16_32 2 "register_operand" "x,Yw")))]
|
||||
"TARGET_SSE2"
|
||||
"@
|
||||
p<maxmin_int>b\t{%2, %0|%0, %2}
|
||||
@ -2269,9 +2276,9 @@
|
||||
"TARGET_SSSE3 && TARGET_MMX_WITH_SSE")
|
||||
|
||||
(define_insn "abs<mode>2"
|
||||
[(set (match_operand:VI_32 0 "register_operand" "=Yv")
|
||||
(abs:VI_32
|
||||
(match_operand:VI_32 1 "register_operand" "Yv")))]
|
||||
[(set (match_operand:VI_16_32 0 "register_operand" "=Yv")
|
||||
(abs:VI_16_32
|
||||
(match_operand:VI_16_32 1 "register_operand" "Yv")))]
|
||||
"TARGET_SSSE3"
|
||||
"%vpabs<mmxvecsize>\t{%1, %0|%0, %1}"
|
||||
[(set_attr "type" "sselog1")
|
||||
@ -4351,6 +4358,26 @@
|
||||
(set_attr "type" "sseiadd")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "uavgv2qi3_ceil"
|
||||
[(set (match_operand:V2QI 0 "register_operand" "=x,Yw")
|
||||
(truncate:V2QI
|
||||
(lshiftrt:V2HI
|
||||
(plus:V2HI
|
||||
(plus:V2HI
|
||||
(zero_extend:V2HI
|
||||
(match_operand:V2QI 1 "register_operand" "%0,Yw"))
|
||||
(zero_extend:V2HI
|
||||
(match_operand:V2QI 2 "register_operand" "x,Yw")))
|
||||
(const_vector:V2HI [(const_int 1) (const_int 1)]))
|
||||
(const_int 1))))]
|
||||
"TARGET_SSE2"
|
||||
"@
|
||||
pavgb\t{%2, %0|%0, %2}
|
||||
vpavgb\t{%2, %1, %0|%0, %1, %2}"
|
||||
[(set_attr "isa" "noavx,avx")
|
||||
(set_attr "type" "sseiadd")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "uavgv2hi3_ceil"
|
||||
[(set (match_operand:V2HI 0 "register_operand" "=x,Yw")
|
||||
(truncate:V2HI
|
||||
|
@ -81,6 +81,5 @@ void encrypt_n(const uint8_t in[], uint8_t out[], size_t blocks, uint32_t *EK)
|
||||
}
|
||||
}
|
||||
|
||||
// This used to work on { target x86_64-*-* i?86-*-* } but a fix in SLP
|
||||
// discovery makes us trip over the threshold again.
|
||||
// { dg-final { scan-tree-dump-times "not vectorized: vectorization is not profitable" 2 "slp1" { xfail *-*-* } } }
|
||||
// { dg-final { scan-tree-dump "not vectorized: vectorization is not profitable" "slp1" } }
|
||||
// { dg-final { scan-tree-dump-not "vectorizing stmts using SLP" "slp1" } }
|
||||
|
66
gcc/testsuite/gcc.target/i386/pr103861-3.c
Normal file
66
gcc/testsuite/gcc.target/i386/pr103861-3.c
Normal file
@ -0,0 +1,66 @@
|
||||
/* PR target/103861 */
|
||||
/* { dg-do compile } */
|
||||
/* { dg-options "-O2 -ftree-vectorize -msse4" } */
|
||||
|
||||
char r[2], a[2], b[2];
|
||||
unsigned char ur[2], ua[2], ub[2];
|
||||
|
||||
void maxs (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
r[i] = a[i] > b[i] ? a[i] : b[i];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "pmaxsb" } } */
|
||||
|
||||
void maxu (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
ur[i] = ua[i] > ub[i] ? ua[i] : ub[i];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "pmaxub" } } */
|
||||
|
||||
void mins (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
r[i] = a[i] < b[i] ? a[i] : b[i];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "pminsb" } } */
|
||||
|
||||
void minu (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
ur[i] = ua[i] < ub[i] ? ua[i] : ub[i];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "pminub" } } */
|
||||
|
||||
void _abs (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
r[i] = a[i] < 0 ? -a[i] : a[i];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "pabsb" } } */
|
||||
|
||||
void avgu (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < 2; i++)
|
||||
ur[i] = (ua[i] + ub[i] + 1) >> 1;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler "pavgb" { xfail *-*-* } } } */
|
Loading…
Reference in New Issue
Block a user