mirror of
https://gcc.gnu.org/git/gcc.git
synced 2024-12-18 08:23:50 +08:00
middle-end: Support complex Addition
This patch adds support for * Complex Addition with rotation of 90 and 270. Addition with rotation of the second argument around the Argand plane. Supported rotations are 90 and 180. c = a + (b * I) and c = a + (b * I * I * I) gcc/ChangeLog: * tree-vect-slp-patterns.c: New file. * Makefile.in: Add it. * doc/passes.texi: Document it. * internal-fn.def (COMPLEX_ADD_ROT90, COMPLEX_ADD_ROT270): New. * optabs.def (cadd90_optab, cadd270_optab): New. * doc/md.texi: Document them. * tree-vect-loop.c (vect_analyze_loop_2): Add dissolve code. * tree-vect-slp.c: (vect_free_slp_instance, vect_create_new_slp_node): Export. (vect_match_slp_patterns_2, vect_match_slp_patterns): New. (vect_analyze_slp): Use it. * tree-vectorizer.h (vect_free_slp_tree): Export. (enum _complex_operation): Forward declare. (class vect_pattern): New gcc/testsuite/ChangeLog: * lib/target-supports.exp (check_effective_target_arm_v8_3a_complex_neon_ok_nocache): Fix it. (check_effective_target_vect_complex_add_byte ,check_effective_target_vect_complex_add_int ,check_effective_target_vect_complex_add_short ,check_effective_target_vect_complex_add_long ,check_effective_target_vect_complex_add_half ,check_effective_target_vect_complex_add_float ,check_effective_target_vect_complex_add_double): New. * gcc.dg/vect/complex/bb-slp-complex-add-pattern-byte.c: New test. * gcc.dg/vect/complex/bb-slp-complex-add-pattern-int.c: New test. * gcc.dg/vect/complex/bb-slp-complex-add-pattern-long.c: New test. * gcc.dg/vect/complex/bb-slp-complex-add-pattern-short.c: New test. * gcc.dg/vect/complex/bb-slp-complex-add-pattern-unsigned-byte.c: New test. * gcc.dg/vect/complex/bb-slp-complex-add-pattern-unsigned-int.c: New test. * gcc.dg/vect/complex/bb-slp-complex-add-pattern-unsigned-long.c: New test. * gcc.dg/vect/complex/bb-slp-complex-add-pattern-unsigned-short.c: New test. * gcc.dg/vect/complex/complex-add-pattern-template.c: New test. * gcc.dg/vect/complex/complex-add-template.c: New test. * gcc.dg/vect/complex/complex-operations-run.c: New test. * gcc.dg/vect/complex/complex-operations.c: New test. * gcc.dg/vect/complex/complex.exp: New test. * gcc.dg/vect/complex/fast-math-bb-slp-complex-add-double.c: New test. * gcc.dg/vect/complex/fast-math-bb-slp-complex-add-float.c: New test. * gcc.dg/vect/complex/fast-math-bb-slp-complex-add-half-float.c: New test. * gcc.dg/vect/complex/fast-math-bb-slp-complex-add-pattern-double.c: New test. * gcc.dg/vect/complex/fast-math-bb-slp-complex-add-pattern-float.c: New test. * gcc.dg/vect/complex/fast-math-bb-slp-complex-add-pattern-half-float.c: New test. * gcc.dg/vect/complex/fast-math-complex-add-double.c: New test. * gcc.dg/vect/complex/fast-math-complex-add-float.c: New test. * gcc.dg/vect/complex/fast-math-complex-add-half-float.c: New test. * gcc.dg/vect/complex/fast-math-complex-add-pattern-double.c: New test. * gcc.dg/vect/complex/fast-math-complex-add-pattern-float.c: New test. * gcc.dg/vect/complex/fast-math-complex-add-pattern-half-float.c: New test. * gcc.dg/vect/complex/vect-complex-add-pattern-byte.c: New test. * gcc.dg/vect/complex/vect-complex-add-pattern-int.c: New test. * gcc.dg/vect/complex/vect-complex-add-pattern-long.c: New test. * gcc.dg/vect/complex/vect-complex-add-pattern-short.c: New test. * gcc.dg/vect/complex/vect-complex-add-pattern-unsigned-byte.c: New test. * gcc.dg/vect/complex/vect-complex-add-pattern-unsigned-int.c: New test. * gcc.dg/vect/complex/vect-complex-add-pattern-unsigned-long.c: New test. * gcc.dg/vect/complex/vect-complex-add-pattern-unsigned-short.c: New test.
This commit is contained in:
parent
10bbba9145
commit
3ed472af6b
@ -1647,6 +1647,7 @@ OBJS = \
|
||||
tree-vect-loop.o \
|
||||
tree-vect-loop-manip.o \
|
||||
tree-vect-slp.o \
|
||||
tree-vect-slp-patterns.o \
|
||||
tree-vectorizer.o \
|
||||
tree-vector-builder.o \
|
||||
tree-vrp.o \
|
||||
|
@ -6154,6 +6154,54 @@ floating-point mode.
|
||||
|
||||
This pattern is not allowed to @code{FAIL}.
|
||||
|
||||
@cindex @code{cadd90@var{m}3} instruction pattern
|
||||
@item @samp{cadd90@var{m}3}
|
||||
Perform vector add and subtract on even/odd number pairs. The operation being
|
||||
matched is semantically described as
|
||||
|
||||
@smallexample
|
||||
for (int i = 0; i < N; i += 2)
|
||||
@{
|
||||
c[i] = a[i] - b[i+1];
|
||||
c[i+1] = a[i+1] + b[i];
|
||||
@}
|
||||
@end smallexample
|
||||
|
||||
This operation is semantically equivalent to performing a vector addition of
|
||||
complex numbers in operand 1 with operand 2 rotated by 90 degrees around
|
||||
the argand plane and storing the result in operand 0.
|
||||
|
||||
In GCC lane ordering the real part of the number must be in the even lanes with
|
||||
the imaginary part in the odd lanes.
|
||||
|
||||
The operation is only supported for vector modes @var{m}.
|
||||
|
||||
This pattern is not allowed to @code{FAIL}.
|
||||
|
||||
@cindex @code{cadd270@var{m}3} instruction pattern
|
||||
@item @samp{cadd270@var{m}3}
|
||||
Perform vector add and subtract on even/odd number pairs. The operation being
|
||||
matched is semantically described as
|
||||
|
||||
@smallexample
|
||||
for (int i = 0; i < N; i += 2)
|
||||
@{
|
||||
c[i] = a[i] + b[i+1];
|
||||
c[i+1] = a[i+1] - b[i];
|
||||
@}
|
||||
@end smallexample
|
||||
|
||||
This operation is semantically equivalent to performing a vector addition of
|
||||
complex numbers in operand 1 with operand 2 rotated by 270 degrees around
|
||||
the argand plane and storing the result in operand 0.
|
||||
|
||||
In GCC lane ordering the real part of the number must be in the even lanes with
|
||||
the imaginary part in the odd lanes.
|
||||
|
||||
The operation is only supported for vector modes @var{m}.
|
||||
|
||||
This pattern is not allowed to @code{FAIL}.
|
||||
|
||||
@cindex @code{ffs@var{m}2} instruction pattern
|
||||
@item @samp{ffs@var{m}2}
|
||||
Store into operand 0 one plus the index of the least significant 1-bit
|
||||
|
@ -709,7 +709,8 @@ loop.
|
||||
The pass is implemented in @file{tree-vectorizer.c} (the main driver),
|
||||
@file{tree-vect-loop.c} and @file{tree-vect-loop-manip.c} (loop specific parts
|
||||
and general loop utilities), @file{tree-vect-slp} (loop-aware SLP
|
||||
functionality), @file{tree-vect-stmts.c} and @file{tree-vect-data-refs.c}.
|
||||
functionality), @file{tree-vect-stmts.c}, @file{tree-vect-data-refs.c} and
|
||||
@file{tree-vect-slp-patterns.c} containing the SLP pattern matcher.
|
||||
Analysis of data references is in @file{tree-data-ref.c}.
|
||||
|
||||
SLP Vectorization. This pass performs vectorization of straight-line code. The
|
||||
|
@ -277,6 +277,9 @@ DEF_INTERNAL_FLT_FN (SCALB, ECF_CONST, scalb, binary)
|
||||
DEF_INTERNAL_FLT_FLOATN_FN (FMIN, ECF_CONST, fmin, binary)
|
||||
DEF_INTERNAL_FLT_FLOATN_FN (FMAX, ECF_CONST, fmax, binary)
|
||||
DEF_INTERNAL_OPTAB_FN (XORSIGN, ECF_CONST, xorsign, binary)
|
||||
DEF_INTERNAL_OPTAB_FN (COMPLEX_ADD_ROT90, ECF_CONST, cadd90, binary)
|
||||
DEF_INTERNAL_OPTAB_FN (COMPLEX_ADD_ROT270, ECF_CONST, cadd270, binary)
|
||||
|
||||
|
||||
/* FP scales. */
|
||||
DEF_INTERNAL_FLT_FN (LDEXP, ECF_CONST, ldexp, binary)
|
||||
|
@ -290,6 +290,8 @@ OPTAB_D (atan_optab, "atan$a2")
|
||||
OPTAB_D (atanh_optab, "atanh$a2")
|
||||
OPTAB_D (copysign_optab, "copysign$F$a3")
|
||||
OPTAB_D (xorsign_optab, "xorsign$F$a3")
|
||||
OPTAB_D (cadd90_optab, "cadd90$a3")
|
||||
OPTAB_D (cadd270_optab, "cadd270$a3")
|
||||
OPTAB_D (cos_optab, "cos$a2")
|
||||
OPTAB_D (cosh_optab, "cosh$a2")
|
||||
OPTAB_D (exp10_optab, "exp10$a2")
|
||||
|
@ -0,0 +1,12 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_complex_add_byte } */
|
||||
/* { dg-require-effective-target stdint_types } */
|
||||
/* { dg-add-options arm_v8_1m_mve_fp } */
|
||||
|
||||
#define TYPE int8_t
|
||||
#define N 16
|
||||
#include <stdint.h>
|
||||
#include "complex-add-pattern-template.c"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "slp1" { xfail aarch64_sve2 } } } */
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "slp1" { xfail aarch64_sve2 } } } */
|
@ -0,0 +1,12 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_complex_add_int } */
|
||||
/* { dg-require-effective-target stdint_types } */
|
||||
/* { dg-add-options arm_v8_1m_mve_fp } */
|
||||
|
||||
#define TYPE int32_t
|
||||
#define N 16
|
||||
#include <stdint.h>
|
||||
#include "complex-add-pattern-template.c"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "slp1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "slp1" { xfail aarch64_sve2 } } } */
|
@ -0,0 +1,12 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_complex_add_long } */
|
||||
/* { dg-require-effective-target stdint_types } */
|
||||
/* { dg-add-options arm_v8_1m_mve_fp } */
|
||||
|
||||
#define TYPE int64_t
|
||||
#define N 16
|
||||
#include <stdint.h>
|
||||
#include "complex-add-pattern-template.c"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "slp1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "slp1" { xfail aarch64_sve2 } } } */
|
@ -0,0 +1,12 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_complex_add_short } */
|
||||
/* { dg-require-effective-target stdint_types } */
|
||||
/* { dg-add-options arm_v8_1m_mve_fp } */
|
||||
|
||||
#define TYPE int16_t
|
||||
#define N 16
|
||||
#include <stdint.h>
|
||||
#include "complex-add-pattern-template.c"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "slp1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "slp1" { xfail aarch64_sve2 } } } */
|
@ -0,0 +1,12 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_complex_add_byte } */
|
||||
/* { dg-require-effective-target stdint_types } */
|
||||
/* { dg-add-options arm_v8_1m_mve_fp } */
|
||||
|
||||
#define TYPE uint8_t
|
||||
#define N 16
|
||||
#include <stdint.h>
|
||||
#include "complex-add-pattern-template.c"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "slp1" { xfail aarch64_sve2 } } } */
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "slp1" { xfail aarch64_sve2 } } } */
|
@ -0,0 +1,12 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_complex_add_int } */
|
||||
/* { dg-require-effective-target stdint_types } */
|
||||
/* { dg-add-options arm_v8_1m_mve_fp } */
|
||||
|
||||
#define TYPE uint32_t
|
||||
#define N 16
|
||||
#include <stdint.h>
|
||||
#include "complex-add-pattern-template.c"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "slp1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "slp1" { xfail aarch64_sve2 } } } */
|
@ -0,0 +1,12 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_complex_add_long } */
|
||||
/* { dg-require-effective-target stdint_types } */
|
||||
/* { dg-add-options arm_v8_1m_mve_fp } */
|
||||
|
||||
#define TYPE uint64_t
|
||||
#define N 16
|
||||
#include <stdint.h>
|
||||
#include "complex-add-pattern-template.c"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "slp1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "slp1" { xfail aarch64_sve2 } } } */
|
@ -0,0 +1,12 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_complex_add_short } */
|
||||
/* { dg-require-effective-target stdint_types } */
|
||||
/* { dg-add-options arm_v8_1m_mve_fp } */
|
||||
|
||||
#define TYPE uint16_t
|
||||
#define N 16
|
||||
#include <stdint.h>
|
||||
#include "complex-add-pattern-template.c"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "slp1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "slp1" { xfail aarch64_sve2 } } } */
|
@ -0,0 +1,60 @@
|
||||
void add90 (TYPE a[restrict N], TYPE b[restrict N], TYPE c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i+=2)
|
||||
{
|
||||
c[i] = a[i] - b[i+1];
|
||||
c[i+1] = a[i+1] + b[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "vect" } } */
|
||||
|
||||
void add270 (TYPE a[restrict N], TYPE b[restrict N], TYPE c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i+=2)
|
||||
{
|
||||
c[i] = a[i] + b[i+1];
|
||||
c[i+1] = a[i+1] - b[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "vect" } } */
|
||||
|
||||
void addMixed (TYPE a[restrict N], TYPE b[restrict N], TYPE c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i+=4)
|
||||
{
|
||||
c[i] = a[i] - b[i+1];
|
||||
c[i+1] = a[i+1] + b[i];
|
||||
c[i+2] = a[i+2] + b[i+3];
|
||||
c[i+3] = a[i+3] - b[i+2];
|
||||
}
|
||||
}
|
||||
|
||||
void add90HandUnrolled (TYPE a[restrict N], TYPE b[restrict N],
|
||||
TYPE c[restrict N])
|
||||
{
|
||||
for (int i=0; i < (N /2); i+=4)
|
||||
{
|
||||
c[i] = a[i] - b[i+1];
|
||||
c[i+2] = a[i+2] - b[i+3];
|
||||
c[i+1] = a[i+1] + b[i];
|
||||
c[i+3] = a[i+3] + b[i+2];
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "vect" } } */
|
||||
|
||||
void add90Hybrid (TYPE a[restrict N], TYPE b[restrict N], TYPE c[restrict N],
|
||||
TYPE d[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i+=2)
|
||||
{
|
||||
c[i] = a[i] - b[i+1];
|
||||
c[i+1] = a[i+1] + b[i];
|
||||
d[i] = a[i] - b[i];
|
||||
d[i+1] = a[i+1] - b[i+1];
|
||||
}
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 2 "vect" } } */
|
79
gcc/testsuite/gcc.dg/vect/complex/complex-add-template.c
Normal file
79
gcc/testsuite/gcc.dg/vect/complex/complex-add-template.c
Normal file
@ -0,0 +1,79 @@
|
||||
#include <complex.h>
|
||||
|
||||
void add0 (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
|
||||
_Complex TYPE c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = a[i] + b[i];
|
||||
}
|
||||
|
||||
void add90snd (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
|
||||
_Complex TYPE c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = a[i] + (b[i] * I);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "vect" } } */
|
||||
|
||||
void add180snd (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
|
||||
_Complex TYPE c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = a[i] + (b[i] * I * I);
|
||||
}
|
||||
|
||||
void add270snd (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
|
||||
_Complex TYPE c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = a[i] + (b[i] * I * I * I);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "vect" } } */
|
||||
|
||||
void add90fst (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
|
||||
_Complex TYPE c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = (a[i] * I) + b[i];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "vect" } } */
|
||||
|
||||
void add180fst (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
|
||||
_Complex TYPE c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = (a[i] * I * I) + b[i];
|
||||
}
|
||||
|
||||
void add270fst (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
|
||||
_Complex TYPE c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = (a[i] * I * I * I) + b[i];
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "vect" } } */
|
||||
|
||||
void addconjfst (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
|
||||
_Complex TYPE c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = ~a[i] + b[i];
|
||||
}
|
||||
|
||||
void addconjsnd (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
|
||||
_Complex TYPE c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = a[i] + ~b[i];
|
||||
}
|
||||
|
||||
void addconjboth (_Complex TYPE a[restrict N], _Complex TYPE b[restrict N],
|
||||
_Complex TYPE c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = ~a[i] + ~b[i];
|
||||
}
|
103
gcc/testsuite/gcc.dg/vect/complex/complex-operations-run.c
Normal file
103
gcc/testsuite/gcc.dg/vect/complex/complex-operations-run.c
Normal file
@ -0,0 +1,103 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target vect_complex_add_double } */
|
||||
/* { dg-add-options arm_v8_3a_complex_neon } */
|
||||
/* { dg-add-options arm_v8_1m_mve_fp } */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <complex.h>
|
||||
#include <string.h>
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
|
||||
#define PREF old
|
||||
#pragma GCC push_options
|
||||
#pragma GCC optimize ("no-tree-vectorize")
|
||||
# include "complex-operations.c"
|
||||
#pragma GCC pop_options
|
||||
#undef PREF
|
||||
|
||||
#define PREF new
|
||||
# include "complex-operations.c"
|
||||
#undef PREF
|
||||
|
||||
#define TYPE double
|
||||
#define TYPE2 double
|
||||
#define EP pow(2, -45)
|
||||
|
||||
#define xstr(s) str(s)
|
||||
#define str(s) #s
|
||||
|
||||
#define FCMP(A, B) \
|
||||
((fabs (creal (A) - creal (B)) <= EP) && (fabs (cimag (A) - cimag (B)) <= EP))
|
||||
|
||||
#define CMP(A, B) \
|
||||
(FCMP(A,B) ? "PASS" : "FAIL")
|
||||
|
||||
#define COMPARE(A,B) \
|
||||
memset (&c1, 0, sizeof (c1)); \
|
||||
memset (&c2, 0, sizeof (c2)); \
|
||||
A; B; \
|
||||
if (!FCMP(c1[0],c2[0]) || !FCMP(c1[1], c2[1])) \
|
||||
{ \
|
||||
printf ("=> %s vs %s\n", xstr (A), xstr (B)); \
|
||||
printf ("%a\n", creal (c1[0]) - creal (c2[0])); \
|
||||
printf ("%a\n", cimag (c1[1]) - cimag (c2[1])); \
|
||||
printf ("%.2f+%.2fI == %.2f+%.2fI (%s)\n", creal (c1[0]), cimag (c1[0]), creal (c2[0]), cimag (c2[0]), CMP (c1[0], c2[0])); \
|
||||
printf ("%.2f+%.2fI == %.2f+%.2fI (%s)\n", creal (c1[1]), cimag (c1[1]), creal (c2[1]), cimag (c2[1]), CMP (c1[1], c2[1])); \
|
||||
printf ("\n"); \
|
||||
__builtin_abort (); \
|
||||
}
|
||||
|
||||
int main ()
|
||||
{
|
||||
TYPE2 complex a[] = { 1.0 + 3.0 * I, 2.0 + 3.5 * I, 1.0 + 3.0 * I, 2.0 + 3.5 * I, 1.0 + 3.0 * I, 2.0 + 3.5 * I, 1.0 + 3.0 * I, 2.0 + 3.5 * I, 1.0 + 3.0 * I, 2.0 + 3.5 * I, 1.0 + 3.0 * I, 2.0 + 3.5 * I, 1.0 + 3.0 * I, 2.0 + 3.5 * I, 1.0 + 3.0 * I, 2.0 + 3.5 * I, 1.0 + 3.0 * I, 2.0 + 3.5 * I, 1.0 + 3.0 * I, 2.0 + 3.5 * I, 1.0 + 3.0 * I, 2.0 + 3.5 * I, 1.0 + 3.0 * I, 2.0 + 3.5 * I, 1.0 + 3.0 * I, 2.0 + 3.5 * I, 1.0 + 3.0 * I, 2.0 + 3.5 * I, 1.0 + 3.0 * I, 2.0 + 3.5 * I, 1.0 + 3.0 * I, 2.0 + 3.5 * I };
|
||||
TYPE complex b[] = { 1.1 + 3.1 * I, 2.1 + 3.6 * I, 1.1 + 3.1 * I, 2.1 + 3.6 * I, 1.1 + 3.1 * I, 2.1 + 3.6 * I, 1.1 + 3.1 * I, 2.1 + 3.6 * I, 1.1 + 3.1 * I, 2.1 + 3.6 * I, 1.1 + 3.1 * I, 2.1 + 3.6 * I, 1.1 + 3.1 * I, 2.1 + 3.6 * I, 1.1 + 3.1 * I, 2.1 + 3.6 * I, 1.1 + 3.1 * I, 2.1 + 3.6 * I, 1.1 + 3.1 * I, 2.1 + 3.6 * I, 1.1 + 3.1 * I, 2.1 + 3.6 * I, 1.1 + 3.1 * I, 2.1 + 3.6 * I, 1.1 + 3.1 * I, 2.1 + 3.6 * I, 1.1 + 3.1 * I, 2.1 + 3.6 * I, 1.1 + 3.1 * I, 2.1 + 3.6 * I, 1.1 + 3.1 * I, 2.1 + 3.6 * I };
|
||||
TYPE complex c2[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
TYPE complex c1[] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
TYPE diff1, diff2;
|
||||
|
||||
COMPARE(fma0_old(a, b, c1), fma0_new(a, b, c2));
|
||||
COMPARE(fma90_old(a, b, c1), fma90_new(a, b, c2));
|
||||
COMPARE(fma180_old(a, b, c1), fma180_new(a, b, c2));
|
||||
COMPARE(fma270_old(a, b, c1), fma270_new(a, b, c2));
|
||||
COMPARE(fma0_snd_old(a, b, c1), fma0_snd_new(a, b, c2));
|
||||
COMPARE(fma90_snd_old(a, b, c1), fma90_snd_new(a, b, c2));
|
||||
COMPARE(fma180_snd_old(a, b, c1), fma180_snd_new(a, b, c2));
|
||||
COMPARE(fma270_snd_old(a, b, c1), fma270_snd_new(a, b, c2));
|
||||
COMPARE(fma_conj_first_old(a, b, c1), fma_conj_first_new(a, b, c2));
|
||||
COMPARE(fma_conj_second_old(a, b, c1), fma_conj_second_new(a, b, c2));
|
||||
COMPARE(fma_conj_both_old(a, b, c1), fma_conj_both_new(a, b, c2));
|
||||
COMPARE(fms0_old(a, b, c1), fms0_new(a, b, c2));
|
||||
COMPARE(fms90_old(a, b, c1), fms90_new(a, b, c2));
|
||||
COMPARE(fms180_old(a, b, c1), fms180_new(a, b, c2));
|
||||
COMPARE(fms270_old(a, b, c1), fms270_new(a, b, c2));
|
||||
COMPARE(fms0_snd_old(a, b, c1), fms0_snd_new(a, b, c2));
|
||||
COMPARE(fms90_snd_old(a, b, c1), fms90_snd_new(a, b, c2));
|
||||
COMPARE(fms180_snd_old(a, b, c1), fms180_snd_new(a, b, c2));
|
||||
COMPARE(fms270_snd_old(a, b, c1), fms270_snd_new(a, b, c2));
|
||||
COMPARE(fms_conj_first_old(a, b, c1), fms_conj_first_new(a, b, c2));
|
||||
COMPARE(fms_conj_second_old(a, b, c1), fms_conj_second_new(a, b, c2));
|
||||
COMPARE(fms_conj_both_old(a, b, c1), fms_conj_both_new(a, b, c2));
|
||||
COMPARE(mul0_old(a, b, c1), mul0_new(a, b, c2));
|
||||
COMPARE(mul90_old(a, b, c1), mul90_new(a, b, c2));
|
||||
COMPARE(mul180_old(a, b, c1), mul180_new(a, b, c2));
|
||||
COMPARE(mul270_old(a, b, c1), mul270_new(a, b, c2));
|
||||
COMPARE(mul0_snd_old(a, b, c1), mul0_snd_new(a, b, c2));
|
||||
COMPARE(mul90_snd_old(a, b, c1), mul90_snd_new(a, b, c2));
|
||||
COMPARE(mul180_snd_old(a, b, c1), mul180_snd_new(a, b, c2));
|
||||
COMPARE(mul270_snd_old(a, b, c1), mul270_snd_new(a, b, c2));
|
||||
COMPARE(mul_conj_first_old(a, b, c1), mul_conj_first_new(a, b, c2));
|
||||
COMPARE(mul_conj_second_old(a, b, c1), mul_conj_second_new(a, b, c2));
|
||||
COMPARE(mul_conj_both_old(a, b, c1), mul_conj_both_new(a, b, c2));
|
||||
COMPARE(add0_old(a, b, c1), add0_new(a, b, c2));
|
||||
COMPARE(add90_old(a, b, c1), add90_new(a, b, c2));
|
||||
COMPARE(add180_old(a, b, c1), add180_new(a, b, c2));
|
||||
COMPARE(add270_old(a, b, c1), add270_new(a, b, c2));
|
||||
COMPARE(add0_snd_old(a, b, c1), add0_snd_new(a, b, c2));
|
||||
COMPARE(add90_snd_old(a, b, c1), add90_snd_new(a, b, c2));
|
||||
COMPARE(add180_snd_old(a, b, c1), add180_snd_new(a, b, c2));
|
||||
COMPARE(add270_snd_old(a, b, c1), add270_snd_new(a, b, c2));
|
||||
COMPARE(add_conj_first_old(a, b, c1), add_conj_first_new(a, b, c2));
|
||||
COMPARE(add_conj_second_old(a, b, c1), add_conj_second_new(a, b, c2));
|
||||
COMPARE(add_conj_both_old(a, b, c1), add_conj_both_new(a, b, c2));
|
||||
}
|
358
gcc/testsuite/gcc.dg/vect/complex/complex-operations.c
Normal file
358
gcc/testsuite/gcc.dg/vect/complex/complex-operations.c
Normal file
@ -0,0 +1,358 @@
|
||||
#include <stdio.h>
|
||||
#include <complex.h>
|
||||
|
||||
#ifndef PREF
|
||||
#define PREF c
|
||||
#endif
|
||||
|
||||
#define FX(N,P) P ## _ ## N
|
||||
#define MK(N,P) FX(P,N)
|
||||
|
||||
#define N 32
|
||||
#define TYPE double
|
||||
|
||||
// ------ FMA
|
||||
|
||||
// Complex FMA instructions rotating the result
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(fma0, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] += a[i] * b[i];
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(fma90, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] += a[i] * b[i] * I;
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(fma180, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] += a[i] * b[i] * I * I;
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(fma270, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] += a[i] * b[i] * I * I * I;
|
||||
}
|
||||
|
||||
// Complex FMA instructions rotating the second parameter.
|
||||
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(fma0_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] += a[i] * b[i];
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(fma90_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] += a[i] * (b[i] * I);
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(fma180_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] += a[i] * (b[i] * I * I);
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(fma270_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] += a[i] * (b[i] * I * I * I);
|
||||
}
|
||||
|
||||
// Complex FMA instructions with conjucated values.
|
||||
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(fma_conj_first, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] += conj (a[i]) * b[i];
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(fma_conj_second, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] += a[i] * conj (b[i]);
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(fma_conj_both, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] += conj (a[i]) * conj (b[i]);
|
||||
}
|
||||
|
||||
// ----- FMS
|
||||
|
||||
// Complex FMS instructions rotating the result
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(fms0, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] -= a[i] * b[i];
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(fms90, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] -= a[i] * b[i] * I;
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(fms180, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] -= a[i] * b[i] * I * I;
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(fms270, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] -= a[i] * b[i] * I * I * I;
|
||||
}
|
||||
|
||||
// Complex FMS instructions rotating the second parameter.
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(fms0_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] -= a[i] * b[i];
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(fms90_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] -= a[i] * (b[i] * I);
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(fms180_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] -= a[i] * (b[i] * I * I);
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(fms270_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] -= a[i] * (b[i] * I * I * I);
|
||||
}
|
||||
|
||||
// Complex FMS instructions with conjucated values.
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(fms_conj_first, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] -= conj (a[i]) * b[i];
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(fms_conj_second, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] -= a[i] * conj (b[i]);
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(fms_conj_both, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] -= conj (a[i]) * conj (b[i]);
|
||||
}
|
||||
|
||||
|
||||
// ----- MUL
|
||||
|
||||
// Complex MUL instructions rotating the result
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(mul0, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = a[i] * b[i];
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(mul90, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = a[i] * b[i] * I;
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(mul180, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = a[i] * b[i] * I * I;
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(mul270, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = a[i] * b[i] * I * I * I;
|
||||
}
|
||||
|
||||
// Complex MUL instructions rotating the second parameter.
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(mul0_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = a[i] * b[i];
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(mul90_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = a[i] * (b[i] * I);
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(mul180_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = a[i] * (b[i] * I * I);
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(mul270_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = a[i] * (b[i] * I * I * I);
|
||||
}
|
||||
|
||||
// Complex FMS instructions with conjucated values.
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(mul_conj_first, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = conj (a[i]) * b[i];
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(mul_conj_second, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = a[i] * conj (b[i]);
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(mul_conj_both, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = conj (a[i]) * conj (b[i]);
|
||||
}
|
||||
|
||||
|
||||
// ----- ADD
|
||||
|
||||
// Complex ADD instructions rotating the result
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(add0, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = a[i] + b[i];
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(add90, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = (a[i] + b[i]) * I;
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(add180, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = (a[i] + b[i]) * I * I;
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(add270, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = (a[i] + b[i]) * I * I * I;
|
||||
}
|
||||
|
||||
// Complex ADD instructions rotating the second parameter.
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(add0_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = a[i] + b[i];
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(add90_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = a[i] + (b[i] * I);
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(add180_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = a[i] + (b[i] * I * I);
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(add270_snd, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = a[i] + (b[i] * I * I * I);
|
||||
}
|
||||
|
||||
// Complex ADD instructions with conjucated values.
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(add_conj_first, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = conj (a[i]) + b[i];
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(add_conj_second, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = a[i] + conj (b[i]);
|
||||
}
|
||||
|
||||
__attribute__((noinline,noipa))
|
||||
void MK(add_conj_both, PREF) (TYPE complex a[restrict N], TYPE complex b[restrict N], TYPE complex c[restrict N])
|
||||
{
|
||||
for (int i=0; i < N; i++)
|
||||
c[i] = conj (a[i]) + conj (b[i]);
|
||||
}
|
||||
|
||||
|
20
gcc/testsuite/gcc.dg/vect/complex/complex.exp
Normal file
20
gcc/testsuite/gcc.dg/vect/complex/complex.exp
Normal file
@ -0,0 +1,20 @@
|
||||
# Copyright (C) 1997-2020 Free Software Foundation, Inc.
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 3 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with GCC; see the file COPYING3. If not see
|
||||
# <http://www.gnu.org/licenses/>.
|
||||
|
||||
# GCC testsuite that uses the `dg.exp' driver.
|
||||
|
||||
# Load support procs.
|
||||
load_file $srcdir/$subdir/../vect.exp
|
@ -0,0 +1,12 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_complex_add_double } */
|
||||
/* { dg-add-options arm_v8_3a_complex_neon } */
|
||||
/* { dg-add-options arm_v8_1m_mve_fp } */
|
||||
|
||||
#define TYPE double
|
||||
#define N 16
|
||||
#include "complex-add-template.c"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "slp1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "slp1" } } */
|
||||
|
@ -0,0 +1,11 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_complex_add_float } */
|
||||
/* { dg-add-options arm_v8_3a_complex_neon } */
|
||||
/* { dg-add-options arm_v8_1m_mve_fp } */
|
||||
|
||||
#define TYPE float
|
||||
#define N 16
|
||||
#include "complex-add-template.c"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "slp1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "slp1" } } */
|
@ -0,0 +1,13 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_complex_add_half } */
|
||||
/* { dg-add-options arm_v8_3a_fp16_complex_neon } */
|
||||
/* { dg-add-options arm_v8_1m_mve_fp } */
|
||||
|
||||
#define TYPE _Float16
|
||||
#define N 16
|
||||
#include "complex-add-template.c"
|
||||
|
||||
/* Vectorization is failing for these cases. They should work but for now ignore. */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "slp1" { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "slp1" { xfail *-*-* } } } */
|
@ -0,0 +1,11 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_complex_add_double } */
|
||||
/* { dg-add-options arm_v8_3a_complex_neon } */
|
||||
/* { dg-add-options arm_v8_1m_mve_fp } */
|
||||
|
||||
#define TYPE double
|
||||
#define N 16
|
||||
#include "complex-add-pattern-template.c"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "slp1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "slp1" } } */
|
@ -0,0 +1,11 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_complex_add_float } */
|
||||
/* { dg-add-options arm_v8_3a_complex_neon } */
|
||||
/* { dg-add-options arm_v8_1m_mve_fp } */
|
||||
|
||||
#define TYPE float
|
||||
#define N 16
|
||||
#include "complex-add-pattern-template.c"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "slp1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "slp1" } } */
|
@ -0,0 +1,11 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_complex_add_half } */
|
||||
/* { dg-add-options arm_v8_3a_fp16_complex_neon } */
|
||||
/* { dg-add-options arm_v8_1m_mve_fp } */
|
||||
|
||||
#define TYPE _Float16
|
||||
#define N 16
|
||||
#include "complex-add-pattern-template.c"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "slp1" } } */
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "slp1" { xfail arm*-*-* } } } */
|
@ -0,0 +1,11 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_complex_add_double } */
|
||||
/* { dg-add-options arm_v8_3a_complex_neon } */
|
||||
/* { dg-add-options arm_v8_1m_mve_fp } */
|
||||
|
||||
#define TYPE double
|
||||
#define N 200
|
||||
#include "complex-add-template.c"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 2 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 2 "vect" } } */
|
@ -0,0 +1,11 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_complex_add_float } */
|
||||
/* { dg-add-options arm_v8_3a_complex_neon } */
|
||||
/* { dg-add-options arm_v8_1m_mve_fp } */
|
||||
|
||||
#define TYPE float
|
||||
#define N 200
|
||||
#include "complex-add-template.c"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 2 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 2 "vect" } } */
|
@ -0,0 +1,11 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_complex_add_half } */
|
||||
/* { dg-add-options arm_v8_3a_fp16_complex_neon } */
|
||||
/* { dg-add-options arm_v8_1m_mve_fp } */
|
||||
|
||||
#define TYPE _Float16
|
||||
#define N 200
|
||||
#include "complex-add-template.c"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 2 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 2 "vect" } } */
|
@ -0,0 +1,11 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_complex_add_double } */
|
||||
/* { dg-add-options arm_v8_3a_complex_neon } */
|
||||
/* { dg-add-options arm_v8_1m_mve_fp } */
|
||||
|
||||
#define TYPE double
|
||||
#define N 200
|
||||
#include "complex-add-pattern-template.c"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 4 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "vect" } } */
|
@ -0,0 +1,11 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_complex_add_float } */
|
||||
/* { dg-add-options arm_v8_3a_complex_neon } */
|
||||
/* { dg-add-options arm_v8_1m_mve_fp } */
|
||||
|
||||
#define TYPE float
|
||||
#define N 200
|
||||
#include "complex-add-pattern-template.c"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 4 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "vect" } } */
|
@ -0,0 +1,12 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_complex_add_half } */
|
||||
/* { dg-add-options arm_v8_3a_fp16_complex_neon } */
|
||||
/* { dg-add-options arm_v8_1m_mve_fp } */
|
||||
|
||||
#define TYPE _Float16
|
||||
#define N 200
|
||||
#include "complex-add-pattern-template.c"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 4 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "vect" } } */
|
||||
|
@ -0,0 +1,12 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_complex_add_byte } */
|
||||
/* { dg-require-effective-target stdint_types } */
|
||||
/* { dg-add-options arm_v8_1m_mve_fp } */
|
||||
|
||||
#define TYPE int8_t
|
||||
#define N 200
|
||||
#include <stdint.h>
|
||||
#include "complex-add-pattern-template.c"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "vect" } } */
|
@ -0,0 +1,12 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_complex_add_int } */
|
||||
/* { dg-require-effective-target stdint_types } */
|
||||
/* { dg-add-options arm_v8_1m_mve_fp } */
|
||||
|
||||
#define TYPE int32_t
|
||||
#define N 200
|
||||
#include <stdint.h>
|
||||
#include "complex-add-pattern-template.c"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "vect" } } */
|
@ -0,0 +1,12 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_complex_add_long } */
|
||||
/* { dg-require-effective-target stdint_types } */
|
||||
/* { dg-add-options arm_v8_1m_mve_fp } */
|
||||
|
||||
#define TYPE int64_t
|
||||
#define N 200
|
||||
#include <stdint.h>
|
||||
#include "complex-add-pattern-template.c"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "vect" } } */
|
@ -0,0 +1,12 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_complex_add_short } */
|
||||
/* { dg-require-effective-target stdint_types } */
|
||||
/* { dg-add-options arm_v8_1m_mve_fp } */
|
||||
|
||||
#define TYPE int16_t
|
||||
#define N 200
|
||||
#include <stdint.h>
|
||||
#include "complex-add-pattern-template.c"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "vect" } } */
|
@ -0,0 +1,12 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_complex_add_byte } */
|
||||
/* { dg-require-effective-target stdint_types } */
|
||||
/* { dg-add-options arm_v8_1m_mve_fp } */
|
||||
|
||||
#define TYPE uint8_t
|
||||
#define N 200
|
||||
#include <stdint.h>
|
||||
#include "complex-add-pattern-template.c"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "vect" } } */
|
@ -0,0 +1,12 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_complex_add_int } */
|
||||
/* { dg-require-effective-target stdint_types } */
|
||||
/* { dg-add-options arm_v8_1m_mve_fp } */
|
||||
|
||||
#define TYPE uint32_t
|
||||
#define N 200
|
||||
#include <stdint.h>
|
||||
#include "complex-add-pattern-template.c"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "vect" } } */
|
@ -0,0 +1,12 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_complex_add_long } */
|
||||
/* { dg-require-effective-target stdint_types } */
|
||||
/* { dg-add-options arm_v8_1m_mve_fp } */
|
||||
|
||||
#define TYPE uint64_t
|
||||
#define N 200
|
||||
#include <stdint.h>
|
||||
#include "complex-add-pattern-template.c"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "vect" } } */
|
@ -0,0 +1,12 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-require-effective-target vect_complex_add_short } */
|
||||
/* { dg-require-effective-target stdint_types } */
|
||||
/* { dg-add-options arm_v8_1m_mve_fp } */
|
||||
|
||||
#define TYPE uint16_t
|
||||
#define N 200
|
||||
#include <stdint.h>
|
||||
#include "complex-add-pattern-template.c"
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT90" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "stmt.*COMPLEX_ADD_ROT270" 1 "vect" } } */
|
@ -3367,6 +3367,115 @@ proc check_effective_target_vect_int { } {
|
||||
}}]
|
||||
}
|
||||
|
||||
# Return 1 if the target supports hardware vectorization of complex additions of
|
||||
# byte, 0 otherwise.
|
||||
#
|
||||
# This won't change for different subtargets so cache the result.
|
||||
|
||||
proc check_effective_target_vect_complex_add_byte { } {
|
||||
return [check_cached_effective_target_indexed vect_complex_add_byte {
|
||||
expr {
|
||||
([check_effective_target_aarch64_sve2]
|
||||
&& [check_effective_target_aarch64_little_endian])
|
||||
|| ([check_effective_target_arm_v8_1m_mve_fp_ok]
|
||||
&& [check_effective_target_arm_little_endian])
|
||||
}}]
|
||||
}
|
||||
|
||||
# Return 1 if the target supports hardware vectorization of complex additions of
|
||||
# short, 0 otherwise.
|
||||
#
|
||||
# This won't change for different subtargets so cache the result.
|
||||
|
||||
proc check_effective_target_vect_complex_add_short { } {
|
||||
return [check_cached_effective_target_indexed vect_complex_add_short {
|
||||
expr {
|
||||
([check_effective_target_aarch64_sve2]
|
||||
&& [check_effective_target_aarch64_little_endian])
|
||||
|| ([check_effective_target_arm_v8_1m_mve_fp_ok]
|
||||
&& [check_effective_target_arm_little_endian])
|
||||
}}]
|
||||
}
|
||||
|
||||
# Return 1 if the target supports hardware vectorization of complex additions of
|
||||
# int, 0 otherwise.
|
||||
#
|
||||
# This won't change for different subtargets so cache the result.
|
||||
|
||||
proc check_effective_target_vect_complex_add_int { } {
|
||||
return [check_cached_effective_target_indexed vect_complex_add_int {
|
||||
expr {
|
||||
([check_effective_target_aarch64_sve2]
|
||||
&& [check_effective_target_aarch64_little_endian])
|
||||
|| ([check_effective_target_arm_v8_1m_mve_fp_ok]
|
||||
&& [check_effective_target_arm_little_endian])
|
||||
}}]
|
||||
}
|
||||
|
||||
# Return 1 if the target supports hardware vectorization of complex additions of
|
||||
# long, 0 otherwise.
|
||||
#
|
||||
# This won't change for different subtargets so cache the result.
|
||||
|
||||
proc check_effective_target_vect_complex_add_long { } {
|
||||
return [check_cached_effective_target_indexed vect_complex_add_long {
|
||||
expr {
|
||||
([check_effective_target_aarch64_sve2]
|
||||
&& [check_effective_target_aarch64_little_endian])
|
||||
|| ([check_effective_target_arm_v8_1m_mve_fp_ok]
|
||||
&& [check_effective_target_arm_little_endian])
|
||||
}}]
|
||||
}
|
||||
|
||||
# Return 1 if the target supports hardware vectorization of complex additions of
|
||||
# half, 0 otherwise.
|
||||
#
|
||||
# This won't change for different subtargets so cache the result.
|
||||
|
||||
proc check_effective_target_vect_complex_add_half { } {
|
||||
return [check_cached_effective_target_indexed vect_complex_add_half {
|
||||
expr {
|
||||
([check_effective_target_arm_v8_3a_fp16_complex_neon_ok]
|
||||
&& ([check_effective_target_aarch64_little_endian]
|
||||
|| [check_effective_target_arm_little_endian]))
|
||||
|| ([check_effective_target_aarch64_sve2]
|
||||
&& [check_effective_target_aarch64_little_endian])
|
||||
|| ([check_effective_target_arm_v8_1m_mve_fp_ok]
|
||||
&& [check_effective_target_arm_little_endian])
|
||||
}}]
|
||||
}
|
||||
|
||||
# Return 1 if the target supports hardware vectorization of complex additions of
|
||||
# float, 0 otherwise.
|
||||
#
|
||||
# This won't change for different subtargets so cache the result.
|
||||
|
||||
proc check_effective_target_vect_complex_add_float { } {
|
||||
return [check_cached_effective_target_indexed vect_complex_add_float {
|
||||
expr {
|
||||
([check_effective_target_arm_v8_3a_complex_neon_ok]
|
||||
&& ([check_effective_target_aarch64_little_endian]
|
||||
|| [check_effective_target_arm_little_endian]))
|
||||
|| ([check_effective_target_aarch64_sve2]
|
||||
&& [check_effective_target_aarch64_little_endian])
|
||||
|| ([check_effective_target_arm_v8_1m_mve_fp_ok]
|
||||
&& [check_effective_target_arm_little_endian])
|
||||
}}]
|
||||
}
|
||||
|
||||
# Return 1 if the target supports hardware vectorization of complex additions of
|
||||
# double, 0 otherwise.
|
||||
#
|
||||
# This won't change for different subtargets so cache the result.
|
||||
|
||||
proc check_effective_target_vect_complex_add_double { } {
|
||||
return [check_cached_effective_target_indexed vect_complex_add_double {
|
||||
expr {
|
||||
([check_effective_target_aarch64_sve2]
|
||||
&& [check_effective_target_aarch64_little_endian])
|
||||
}}]
|
||||
}
|
||||
|
||||
# Return 1 if the target supports signed int->float conversion
|
||||
#
|
||||
|
||||
@ -10386,13 +10495,13 @@ proc check_effective_target_arm_v8_3a_complex_neon_ok_nocache { } {
|
||||
# need to be added to the -march option.
|
||||
foreach flags {"" "-mfloat-abi=softfp -mfpu=auto" "-mfloat-abi=hard -mfpu=auto"} {
|
||||
if { [check_no_compiler_messages_nocache \
|
||||
arm_v8_3a_complex_neon_ok object {
|
||||
arm_v8_3a_complex_neon_ok assembly {
|
||||
#if !defined (__ARM_FEATURE_COMPLEX)
|
||||
#error "__ARM_FEATURE_COMPLEX not defined"
|
||||
#endif
|
||||
} "$flags -march=armv8.3-a"] } {
|
||||
set et_arm_v8_3a_complex_neon_flags "$flags -march=armv8.3-a"
|
||||
return 1
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
@ -10412,13 +10521,57 @@ proc add_options_for_arm_v8_3a_complex_neon { flags } {
|
||||
return "$flags $et_arm_v8_3a_complex_neon_flags"
|
||||
}
|
||||
|
||||
# Return 1 if the target supports ARMv8.3 Adv.SIMD + FP16 Complex instructions
|
||||
# instructions, 0 otherwise. The test is valid for ARM and for AArch64.
|
||||
# Record the command line options needed.
|
||||
|
||||
proc check_effective_target_arm_v8_3a_fp16_complex_neon_ok_nocache { } {
|
||||
global et_arm_v8_3a_fp16_complex_neon_flags
|
||||
set et_arm_v8_3a_fp16_complex_neon_flags ""
|
||||
|
||||
if { ![istarget arm*-*-*] && ![istarget aarch64*-*-*] } {
|
||||
return 0;
|
||||
}
|
||||
|
||||
# Iterate through sets of options to find the compiler flags that
|
||||
# need to be added to the -march option.
|
||||
foreach flags {"" "-mfloat-abi=softfp -mfpu=auto" "-mfloat-abi=hard -mfpu=auto"} {
|
||||
if { [check_no_compiler_messages_nocache \
|
||||
arm_v8_3a_fp16_complex_neon_ok assembly {
|
||||
#if !defined (__ARM_FEATURE_COMPLEX)
|
||||
#error "__ARM_FEATURE_COMPLEX not defined"
|
||||
#endif
|
||||
} "$flags -march=armv8.3-a+fp16"] } {
|
||||
set et_arm_v8_3a_fp16_complex_neon_flags \
|
||||
"$flags -march=armv8.3-a+fp16"
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
proc check_effective_target_arm_v8_3a_fp16_complex_neon_ok { } {
|
||||
return [check_cached_effective_target arm_v8_3a_fp16_complex_neon_ok \
|
||||
check_effective_target_arm_v8_3a_fp16_complex_neon_ok_nocache]
|
||||
}
|
||||
|
||||
proc add_options_for_arm_v8_3a_fp16_complex_neon { flags } {
|
||||
if { ! [check_effective_target_arm_v8_3a_fp16_complex_neon_ok] } {
|
||||
return "$flags"
|
||||
}
|
||||
global et_arm_v8_3a_fp16_complex_neon_flags
|
||||
return "$flags $et_arm_v8_3a_fp16_complex_neon_flags"
|
||||
}
|
||||
|
||||
|
||||
# Return 1 if the target supports executing AdvSIMD instructions from ARMv8.3
|
||||
# with the complex instruction extension, 0 otherwise. The test is valid for
|
||||
# ARM and for AArch64.
|
||||
|
||||
proc check_effective_target_arm_v8_3a_complex_neon_hw { } {
|
||||
if { ![check_effective_target_arm_v8_3a_complex_neon_ok] } {
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
return [check_runtime arm_v8_3a_complex_neon_hw_available {
|
||||
#include "arm_neon.h"
|
||||
@ -10443,7 +10596,7 @@ proc check_effective_target_arm_v8_3a_complex_neon_hw { } {
|
||||
: /* No clobbers. */);
|
||||
#endif
|
||||
|
||||
return (results[0] == 8 && results[1] == 24) ? 1 : 0;
|
||||
return (results[0] == 8 && results[1] == 24) ? 0 : 1;
|
||||
}
|
||||
} [add_options_for_arm_v8_3a_complex_neon ""]]
|
||||
}
|
||||
|
@ -2698,9 +2698,13 @@ again:
|
||||
STMT_SLP_TYPE (stmt_info) = loop_vect;
|
||||
if (STMT_VINFO_IN_PATTERN_P (stmt_info))
|
||||
{
|
||||
stmt_vec_info pattern_stmt_info
|
||||
= STMT_VINFO_RELATED_STMT (stmt_info);
|
||||
if (STMT_VINFO_SLP_VECT_ONLY (pattern_stmt_info))
|
||||
STMT_VINFO_IN_PATTERN_P (stmt_info) = false;
|
||||
|
||||
gimple *pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
|
||||
stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
|
||||
STMT_SLP_TYPE (stmt_info) = loop_vect;
|
||||
STMT_SLP_TYPE (pattern_stmt_info) = loop_vect;
|
||||
for (gimple_stmt_iterator pi = gsi_start (pattern_def_seq);
|
||||
!gsi_end_p (pi); gsi_next (&pi))
|
||||
STMT_SLP_TYPE (loop_vinfo->lookup_stmt (gsi_stmt (pi)))
|
||||
|
720
gcc/tree-vect-slp-patterns.c
Normal file
720
gcc/tree-vect-slp-patterns.c
Normal file
@ -0,0 +1,720 @@
|
||||
/* SLP - Pattern matcher on SLP trees
|
||||
Copyright (C) 2020 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
GCC is free software; you can redistribute it and/or modify it under
|
||||
the terms of the GNU General Public License as published by the Free
|
||||
Software Foundation; either version 3, or (at your option) any later
|
||||
version.
|
||||
|
||||
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
||||
WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
||||
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
||||
for more details.
|
||||
|
||||
You should have received a copy of the GNU General Public License
|
||||
along with GCC; see the file COPYING3. If not see
|
||||
<http://www.gnu.org/licenses/>. */
|
||||
|
||||
#include "config.h"
|
||||
#include "system.h"
|
||||
#include "coretypes.h"
|
||||
#include "backend.h"
|
||||
#include "target.h"
|
||||
#include "rtl.h"
|
||||
#include "tree.h"
|
||||
#include "gimple.h"
|
||||
#include "tree-pass.h"
|
||||
#include "ssa.h"
|
||||
#include "optabs-tree.h"
|
||||
#include "insn-config.h"
|
||||
#include "recog.h" /* FIXME: for insn_data */
|
||||
#include "fold-const.h"
|
||||
#include "stor-layout.h"
|
||||
#include "gimple-iterator.h"
|
||||
#include "cfgloop.h"
|
||||
#include "tree-vectorizer.h"
|
||||
#include "langhooks.h"
|
||||
#include "gimple-walk.h"
|
||||
#include "dbgcnt.h"
|
||||
#include "tree-vector-builder.h"
|
||||
#include "vec-perm-indices.h"
|
||||
#include "gimple-fold.h"
|
||||
#include "internal-fn.h"
|
||||
|
||||
/* SLP Pattern matching mechanism.
|
||||
|
||||
This extension to the SLP vectorizer allows one to transform the generated SLP
|
||||
tree based on any pattern. The difference between this and the normal vect
|
||||
pattern matcher is that unlike the former, this matcher allows you to match
|
||||
with instructions that do not belong to the same SSA dominator graph.
|
||||
|
||||
The only requirement that this pattern matcher has is that you are only
|
||||
only allowed to either match an entire group or none.
|
||||
|
||||
The pattern matcher currently only allows you to perform replacements to
|
||||
internal functions.
|
||||
|
||||
Once the patterns are matched it is one way, these cannot be undone. It is
|
||||
currently not supported to match patterns recursively.
|
||||
|
||||
To add a new pattern, implement the vect_pattern class and add the type to
|
||||
slp_patterns.
|
||||
|
||||
*/
|
||||
|
||||
/*******************************************************************************
|
||||
* vect_pattern class
|
||||
******************************************************************************/
|
||||
|
||||
/* Default implementation of recognize that performs matching, validation and
|
||||
replacement of nodes but that can be overriden if required. */
|
||||
|
||||
static bool
|
||||
vect_pattern_validate_optab (internal_fn ifn, slp_tree node)
|
||||
{
|
||||
tree vectype = SLP_TREE_VECTYPE (node);
|
||||
if (ifn == IFN_LAST || !vectype)
|
||||
return false;
|
||||
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"Found %s pattern in SLP tree\n",
|
||||
internal_fn_name (ifn));
|
||||
|
||||
if (direct_internal_fn_supported_p (ifn, vectype, OPTIMIZE_FOR_SPEED))
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"Target supports %s vectorization with mode %T\n",
|
||||
internal_fn_name (ifn), vectype);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
{
|
||||
if (!vectype)
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"Target does not support vector type for %T\n",
|
||||
SLP_TREE_DEF_TYPE (node));
|
||||
else
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"Target does not support %s for vector type "
|
||||
"%T\n", internal_fn_name (ifn), vectype);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/*******************************************************************************
|
||||
* General helper types
|
||||
******************************************************************************/
|
||||
|
||||
/* The COMPLEX_OPERATION enum denotes the possible pair of operations that can
|
||||
be matched when looking for expressions that we are interested matching for
|
||||
complex numbers addition and mla. */
|
||||
|
||||
typedef enum _complex_operation : unsigned {
|
||||
PLUS_PLUS,
|
||||
MINUS_PLUS,
|
||||
PLUS_MINUS,
|
||||
MULT_MULT,
|
||||
CMPLX_NONE
|
||||
} complex_operation_t;
|
||||
|
||||
/*******************************************************************************
|
||||
* General helper functions
|
||||
******************************************************************************/
|
||||
|
||||
/* Helper function of linear_loads_p that checks to see if the load permutation
|
||||
is sequential and in monotonically increasing order of loads with no gaps.
|
||||
*/
|
||||
|
||||
static inline complex_perm_kinds_t
|
||||
is_linear_load_p (load_permutation_t loads)
|
||||
{
|
||||
if (loads.length() == 0)
|
||||
return PERM_UNKNOWN;
|
||||
|
||||
unsigned load, i;
|
||||
complex_perm_kinds_t candidates[4]
|
||||
= { PERM_EVENODD
|
||||
, PERM_ODDEVEN
|
||||
, PERM_ODDODD
|
||||
, PERM_EVENEVEN
|
||||
};
|
||||
|
||||
int valid_patterns = 4;
|
||||
FOR_EACH_VEC_ELT_FROM (loads, i, load, 1)
|
||||
{
|
||||
if (candidates[0] != PERM_UNKNOWN && load != i)
|
||||
{
|
||||
candidates[0] = PERM_UNKNOWN;
|
||||
valid_patterns--;
|
||||
}
|
||||
if (candidates[1] != PERM_UNKNOWN
|
||||
&& load != (i % 2 == 0 ? i + 1 : i - 1))
|
||||
{
|
||||
candidates[1] = PERM_UNKNOWN;
|
||||
valid_patterns--;
|
||||
}
|
||||
if (candidates[2] != PERM_UNKNOWN && load != 1)
|
||||
{
|
||||
candidates[2] = PERM_UNKNOWN;
|
||||
valid_patterns--;
|
||||
}
|
||||
if (candidates[3] != PERM_UNKNOWN && load != 0)
|
||||
{
|
||||
candidates[3] = PERM_UNKNOWN;
|
||||
valid_patterns--;
|
||||
}
|
||||
|
||||
if (valid_patterns == 0)
|
||||
return PERM_UNKNOWN;
|
||||
}
|
||||
|
||||
for (i = 0; i < sizeof(candidates); i++)
|
||||
if (candidates[i] != PERM_UNKNOWN)
|
||||
return candidates[i];
|
||||
|
||||
return PERM_UNKNOWN;
|
||||
}
|
||||
|
||||
/* Combine complex_perm_kinds A and B into a new permute kind that describes the
|
||||
resulting operation. */
|
||||
|
||||
static inline complex_perm_kinds_t
|
||||
vect_merge_perms (complex_perm_kinds_t a, complex_perm_kinds_t b)
|
||||
{
|
||||
if (a == b)
|
||||
return a;
|
||||
|
||||
if (a == PERM_TOP)
|
||||
return b;
|
||||
|
||||
if (b == PERM_TOP)
|
||||
return a;
|
||||
|
||||
return PERM_UNKNOWN;
|
||||
}
|
||||
|
||||
/* Check to see if all loads rooted in ROOT are linear. Linearity is
|
||||
defined as having no gaps between values loaded. */
|
||||
|
||||
static complex_load_perm_t
|
||||
linear_loads_p (slp_tree_to_load_perm_map_t *perm_cache, slp_tree root)
|
||||
{
|
||||
if (!root)
|
||||
return std::make_pair (PERM_UNKNOWN, vNULL);
|
||||
|
||||
unsigned i;
|
||||
complex_load_perm_t *tmp;
|
||||
|
||||
if ((tmp = perm_cache->get (root)) != NULL)
|
||||
return *tmp;
|
||||
|
||||
complex_load_perm_t retval = std::make_pair (PERM_UNKNOWN, vNULL);
|
||||
perm_cache->put (root, retval);
|
||||
|
||||
/* If it's a load node, then just read the load permute. */
|
||||
if (SLP_TREE_LOAD_PERMUTATION (root).exists ())
|
||||
{
|
||||
retval.first = is_linear_load_p (SLP_TREE_LOAD_PERMUTATION (root));
|
||||
retval.second = SLP_TREE_LOAD_PERMUTATION (root);
|
||||
perm_cache->put (root, retval);
|
||||
return retval;
|
||||
}
|
||||
else if (SLP_TREE_DEF_TYPE (root) != vect_internal_def)
|
||||
{
|
||||
retval.first = PERM_TOP;
|
||||
return retval;
|
||||
}
|
||||
|
||||
auto_vec<load_permutation_t> all_loads;
|
||||
complex_perm_kinds_t kind = PERM_TOP;
|
||||
|
||||
slp_tree child;
|
||||
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (root), i, child)
|
||||
{
|
||||
complex_load_perm_t res = linear_loads_p (perm_cache, child);
|
||||
kind = vect_merge_perms (kind, res.first);
|
||||
if (kind == PERM_UNKNOWN)
|
||||
return retval;
|
||||
all_loads.safe_push (res.second);
|
||||
}
|
||||
|
||||
if (SLP_TREE_LANE_PERMUTATION (root).exists ())
|
||||
{
|
||||
lane_permutation_t perm = SLP_TREE_LANE_PERMUTATION (root);
|
||||
load_permutation_t nloads;
|
||||
nloads.create (SLP_TREE_LANES (root));
|
||||
nloads.quick_grow (SLP_TREE_LANES (root));
|
||||
for (i = 0; i < SLP_TREE_LANES (root); i++)
|
||||
nloads[i] = all_loads[perm[i].first][perm[i].second];
|
||||
|
||||
retval.first = kind;
|
||||
retval.second = nloads;
|
||||
}
|
||||
else if (all_loads.length () == 1)
|
||||
{
|
||||
retval.first = kind;
|
||||
retval.second = all_loads[0];
|
||||
}
|
||||
|
||||
perm_cache->put (root, retval);
|
||||
return retval;
|
||||
}
|
||||
|
||||
|
||||
/* This function attempts to make a node rooted in NODE is linear. If the node
|
||||
if already linear than the node itself is returned in RESULT.
|
||||
|
||||
If the node is not linear then a new VEC_PERM_EXPR node is created with a
|
||||
lane permute that when applied will make the node linear. If such a
|
||||
permute cannot be created then FALSE is returned from the function.
|
||||
|
||||
Here linearity is defined as having a sequential, monotically increasing
|
||||
load position inside the load permute generated by the loads reachable from
|
||||
NODE. */
|
||||
|
||||
static slp_tree
|
||||
vect_build_swap_evenodd_node (slp_tree node)
|
||||
{
|
||||
/* Attempt to linearise the permute. */
|
||||
vec<std::pair<unsigned, unsigned> > zipped;
|
||||
zipped.create (SLP_TREE_LANES (node));
|
||||
|
||||
for (unsigned x = 0; x < SLP_TREE_LANES (node); x+=2)
|
||||
{
|
||||
zipped.quick_push (std::make_pair (0, x+1));
|
||||
zipped.quick_push (std::make_pair (0, x));
|
||||
}
|
||||
|
||||
/* Create the new permute node and store it instead. */
|
||||
slp_tree vnode = vect_create_new_slp_node (1, VEC_PERM_EXPR);
|
||||
SLP_TREE_LANE_PERMUTATION (vnode) = zipped;
|
||||
SLP_TREE_VECTYPE (vnode) = SLP_TREE_VECTYPE (node);
|
||||
SLP_TREE_CHILDREN (vnode).quick_push (node);
|
||||
SLP_TREE_REF_COUNT (vnode) = 1;
|
||||
SLP_TREE_LANES (vnode) = SLP_TREE_LANES (node);
|
||||
SLP_TREE_REPRESENTATIVE (vnode) = SLP_TREE_REPRESENTATIVE (node);
|
||||
SLP_TREE_REF_COUNT (node)++;
|
||||
return vnode;
|
||||
}
|
||||
|
||||
/* Checks to see of the expression represented by NODE is a gimple assign with
|
||||
code CODE. */
|
||||
|
||||
static inline bool
|
||||
vect_match_expression_p (slp_tree node, tree_code code)
|
||||
{
|
||||
if (!node
|
||||
|| !SLP_TREE_REPRESENTATIVE (node))
|
||||
return false;
|
||||
|
||||
gimple* expr = STMT_VINFO_STMT (SLP_TREE_REPRESENTATIVE (node));
|
||||
if (!is_gimple_assign (expr)
|
||||
|| gimple_assign_rhs_code (expr) != code)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Check if the given lane permute in PERMUTES matches an alternating sequence
|
||||
of {even odd even odd ...}. This to account for unrolled loops. Further
|
||||
mode there resulting permute must be linear. */
|
||||
|
||||
static inline bool
|
||||
vect_check_evenodd_blend (lane_permutation_t &permutes,
|
||||
unsigned even, unsigned odd)
|
||||
{
|
||||
if (permutes.length () == 0)
|
||||
return false;
|
||||
|
||||
unsigned val[2] = {even, odd};
|
||||
unsigned seed = 0;
|
||||
for (unsigned i = 0; i < permutes.length (); i++)
|
||||
if (permutes[i].first != val[i % 2]
|
||||
|| permutes[i].second != seed++)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/* This function will match the two gimple expressions representing NODE1 and
|
||||
NODE2 in parallel and returns the pair operation that represents the two
|
||||
expressions in the two statements.
|
||||
|
||||
If match is successful then the corresponding complex_operation is
|
||||
returned and the arguments to the two matched operations are returned in OPS.
|
||||
|
||||
If TWO_OPERANDS it is expected that the LANES of the parent VEC_PERM select
|
||||
from the two nodes alternatingly.
|
||||
|
||||
If unsuccessful then CMPLX_NONE is returned and OPS is untouched.
|
||||
|
||||
e.g. the following gimple statements
|
||||
|
||||
stmt 0 _39 = _37 + _12;
|
||||
stmt 1 _6 = _38 - _36;
|
||||
|
||||
will return PLUS_MINUS along with OPS containing {_37, _12, _38, _36}.
|
||||
*/
|
||||
|
||||
static complex_operation_t
|
||||
vect_detect_pair_op (slp_tree node1, slp_tree node2, lane_permutation_t &lanes,
|
||||
bool two_operands = true, vec<slp_tree> *ops = NULL)
|
||||
{
|
||||
complex_operation_t result = CMPLX_NONE;
|
||||
|
||||
if (vect_match_expression_p (node1, MINUS_EXPR)
|
||||
&& vect_match_expression_p (node2, PLUS_EXPR)
|
||||
&& (!two_operands || vect_check_evenodd_blend (lanes, 0, 1)))
|
||||
result = MINUS_PLUS;
|
||||
else if (vect_match_expression_p (node1, PLUS_EXPR)
|
||||
&& vect_match_expression_p (node2, MINUS_EXPR)
|
||||
&& (!two_operands || vect_check_evenodd_blend (lanes, 0, 1)))
|
||||
result = PLUS_MINUS;
|
||||
else if (vect_match_expression_p (node1, PLUS_EXPR)
|
||||
&& vect_match_expression_p (node2, PLUS_EXPR))
|
||||
result = PLUS_PLUS;
|
||||
else if (vect_match_expression_p (node1, MULT_EXPR)
|
||||
&& vect_match_expression_p (node2, MULT_EXPR))
|
||||
result = MULT_MULT;
|
||||
|
||||
if (result != CMPLX_NONE && ops != NULL)
|
||||
{
|
||||
ops->create (2);
|
||||
ops->quick_push (node1);
|
||||
ops->quick_push (node2);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* Overload of vect_detect_pair_op that matches against the representative
|
||||
statements in the children of NODE. It is expected that NODE has exactly
|
||||
two children and when TWO_OPERANDS then NODE must be a VEC_PERM. */
|
||||
|
||||
static complex_operation_t
|
||||
vect_detect_pair_op (slp_tree node, bool two_operands = true,
|
||||
vec<slp_tree> *ops = NULL)
|
||||
{
|
||||
if (!two_operands && SLP_TREE_CODE (node) == VEC_PERM_EXPR)
|
||||
return CMPLX_NONE;
|
||||
|
||||
if (SLP_TREE_CHILDREN (node).length () != 2)
|
||||
return CMPLX_NONE;
|
||||
|
||||
vec<slp_tree> children = SLP_TREE_CHILDREN (node);
|
||||
lane_permutation_t &lanes = SLP_TREE_LANE_PERMUTATION (node);
|
||||
|
||||
return vect_detect_pair_op (children[0], children[1], lanes, two_operands,
|
||||
ops);
|
||||
}
|
||||
|
||||
/*******************************************************************************
|
||||
* complex_pattern class
|
||||
******************************************************************************/
|
||||
|
||||
/* SLP Complex Numbers pattern matching.
|
||||
|
||||
As an example, the following simple loop:
|
||||
|
||||
double a[restrict N]; double b[restrict N]; double c[restrict N];
|
||||
|
||||
for (int i=0; i < N; i+=2)
|
||||
{
|
||||
c[i] = a[i] - b[i+1];
|
||||
c[i+1] = a[i+1] + b[i];
|
||||
}
|
||||
|
||||
which represents a complex addition on with a rotation of 90* around the
|
||||
argand plane. i.e. if `a` and `b` were complex numbers then this would be the
|
||||
same as `a + (b * I)`.
|
||||
|
||||
Here the expressions for `c[i]` and `c[i+1]` are independent but have to be
|
||||
both recognized in order for the pattern to work. As an SLP tree this is
|
||||
represented as
|
||||
|
||||
+--------------------------------+
|
||||
| stmt 0 *_9 = _10; |
|
||||
| stmt 1 *_15 = _16; |
|
||||
+--------------------------------+
|
||||
|
|
||||
|
|
||||
v
|
||||
+--------------------------------+
|
||||
| stmt 0 _10 = _4 - _8; |
|
||||
| stmt 1 _16 = _12 + _14; |
|
||||
| lane permutation { 0[0] 1[1] } |
|
||||
+--------------------------------+
|
||||
| |
|
||||
| |
|
||||
| |
|
||||
+-----+ | | +-----+
|
||||
| | | | | |
|
||||
+-----| { } |<-----+ +----->| { } --------+
|
||||
| | | +------------------| | |
|
||||
| +-----+ | +-----+ |
|
||||
| | | |
|
||||
| | | |
|
||||
| +------|------------------+ |
|
||||
| | | |
|
||||
v v v v
|
||||
+--------------------------+ +--------------------------------+
|
||||
| stmt 0 _8 = *_7; | | stmt 0 _4 = *_3; |
|
||||
| stmt 1 _14 = *_13; | | stmt 1 _12 = *_11; |
|
||||
| load permutation { 1 0 } | | load permutation { 0 1 } |
|
||||
+--------------------------+ +--------------------------------+
|
||||
|
||||
The pattern matcher allows you to replace both statements 0 and 1 or none at
|
||||
all. Because this operation is a two operands operation the actual nodes
|
||||
being replaced are those in the { } nodes. The actual scalar statements
|
||||
themselves are not replaced or used during the matching but instead the
|
||||
SLP_TREE_REPRESENTATIVE statements are inspected. You are also allowed to
|
||||
replace and match on any number of nodes.
|
||||
|
||||
Because the pattern matcher matches on the representative statement for the
|
||||
SLP node the case of two_operators it allows you to match the children of the
|
||||
node. This is done using the method `recognize ()`.
|
||||
|
||||
*/
|
||||
|
||||
/* The complex_pattern class contains common code for pattern matchers that work
|
||||
on complex numbers. These provide functionality to allow de-construction and
|
||||
validation of sequences depicting/transforming REAL and IMAG pairs. */
|
||||
|
||||
class complex_pattern : public vect_pattern
|
||||
{
|
||||
protected:
|
||||
auto_vec<slp_tree> m_workset;
|
||||
complex_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
|
||||
: vect_pattern (node, m_ops, ifn)
|
||||
{
|
||||
this->m_workset.safe_push (*node);
|
||||
}
|
||||
|
||||
public:
|
||||
void build (vec_info *);
|
||||
|
||||
static internal_fn
|
||||
matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
|
||||
vec<slp_tree> *);
|
||||
};
|
||||
|
||||
/* Create a replacement pattern statement for each node in m_node and inserts
|
||||
the new statement into m_node as the new representative statement. The old
|
||||
statement is marked as being in a pattern defined by the new statement. The
|
||||
statement is created as call to internal function IFN with m_num_args
|
||||
arguments.
|
||||
|
||||
Futhermore the new pattern is also added to the vectorization information
|
||||
structure VINFO and the old statement STMT_INFO is marked as unused while
|
||||
the new statement is marked as used and the number of SLP uses of the new
|
||||
statement is incremented.
|
||||
|
||||
The newly created SLP nodes are marked as SLP only and will be dissolved
|
||||
if SLP is aborted.
|
||||
|
||||
The newly created gimple call is returned and the BB remains unchanged.
|
||||
|
||||
This default method is designed to only match against simple operands where
|
||||
all the input and output types are the same.
|
||||
*/
|
||||
|
||||
void
|
||||
complex_pattern::build (vec_info *vinfo)
|
||||
{
|
||||
stmt_vec_info stmt_info;
|
||||
|
||||
auto_vec<tree> args;
|
||||
args.create (this->m_num_args);
|
||||
args.quick_grow_cleared (this->m_num_args);
|
||||
slp_tree node;
|
||||
unsigned ix;
|
||||
stmt_vec_info call_stmt_info;
|
||||
gcall *call_stmt = NULL;
|
||||
|
||||
/* Now modify the nodes themselves. */
|
||||
FOR_EACH_VEC_ELT (this->m_workset, ix, node)
|
||||
{
|
||||
/* Calculate the location of the statement in NODE to replace. */
|
||||
stmt_info = SLP_TREE_REPRESENTATIVE (node);
|
||||
gimple* old_stmt = STMT_VINFO_STMT (stmt_info);
|
||||
tree lhs_old_stmt = gimple_get_lhs (old_stmt);
|
||||
tree type = TREE_TYPE (lhs_old_stmt);
|
||||
|
||||
/* Create the argument set for use by gimple_build_call_internal_vec. */
|
||||
for (unsigned i = 0; i < this->m_num_args; i++)
|
||||
args[i] = lhs_old_stmt;
|
||||
|
||||
/* Create the new pattern statements. */
|
||||
call_stmt = gimple_build_call_internal_vec (this->m_ifn, args);
|
||||
tree var = make_temp_ssa_name (type, call_stmt, "slp_patt");
|
||||
gimple_call_set_lhs (call_stmt, var);
|
||||
gimple_set_location (call_stmt, gimple_location (old_stmt));
|
||||
gimple_call_set_nothrow (call_stmt, true);
|
||||
|
||||
/* Adjust the book-keeping for the new and old statements for use during
|
||||
SLP. This is required to get the right VF and statement during SLP
|
||||
analysis. These changes are created after relevancy has been set for
|
||||
the nodes as such we need to manually update them. Any changes will be
|
||||
undone if SLP is cancelled. */
|
||||
call_stmt_info
|
||||
= vinfo->add_pattern_stmt (call_stmt, stmt_info);
|
||||
|
||||
/* Make sure to mark the representative statement pure_slp and
|
||||
relevant. */
|
||||
STMT_VINFO_RELEVANT (call_stmt_info) = vect_used_in_scope;
|
||||
STMT_SLP_TYPE (call_stmt_info) = pure_slp;
|
||||
|
||||
/* add_pattern_stmt can't be done in vect_mark_pattern_stmts because
|
||||
the non-SLP pattern matchers already have added the statement to VINFO
|
||||
by the time it is called. Some of them need to modify the returned
|
||||
stmt_info. vect_mark_pattern_stmts is called by recog_pattern and it
|
||||
would increase the size of each pattern with boilerplate code to make
|
||||
the call there. */
|
||||
vect_mark_pattern_stmts (vinfo, stmt_info, call_stmt,
|
||||
SLP_TREE_VECTYPE (node));
|
||||
STMT_VINFO_SLP_VECT_ONLY (call_stmt_info) = true;
|
||||
|
||||
/* Since we are replacing all the statements in the group with the same
|
||||
thing it doesn't really matter. So just set it every time a new stmt
|
||||
is created. */
|
||||
SLP_TREE_REPRESENTATIVE (node) = call_stmt_info;
|
||||
SLP_TREE_LANE_PERMUTATION (node).release ();
|
||||
SLP_TREE_CODE (node) = CALL_EXPR;
|
||||
}
|
||||
}
|
||||
|
||||
/*******************************************************************************
|
||||
* complex_add_pattern class
|
||||
******************************************************************************/
|
||||
|
||||
class complex_add_pattern : public complex_pattern
|
||||
{
|
||||
protected:
|
||||
complex_add_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
|
||||
: complex_pattern (node, m_ops, ifn)
|
||||
{
|
||||
this->m_num_args = 2;
|
||||
}
|
||||
|
||||
public:
|
||||
void build (vec_info *);
|
||||
static internal_fn
|
||||
matches (complex_operation_t op, slp_tree_to_load_perm_map_t *,
|
||||
vec<slp_tree> *);
|
||||
|
||||
static vect_pattern*
|
||||
recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
|
||||
};
|
||||
|
||||
/* Perform a replacement of the detected complex add pattern with the new
|
||||
instruction sequences. */
|
||||
|
||||
void
|
||||
complex_add_pattern::build (vec_info *vinfo)
|
||||
{
|
||||
auto_vec<slp_tree> nodes;
|
||||
slp_tree node = this->m_ops[0];
|
||||
vec<slp_tree> children = SLP_TREE_CHILDREN (node);
|
||||
|
||||
/* First re-arrange the children. */
|
||||
nodes.create (children.length ());
|
||||
nodes.quick_push (children[0]);
|
||||
nodes.quick_push (vect_build_swap_evenodd_node (children[1]));
|
||||
|
||||
SLP_TREE_CHILDREN (*this->m_node).truncate (0);
|
||||
SLP_TREE_CHILDREN (*this->m_node).safe_splice (nodes);
|
||||
|
||||
complex_pattern::build (vinfo);
|
||||
}
|
||||
|
||||
/* Pattern matcher for trying to match complex addition pattern in SLP tree.
|
||||
|
||||
If no match is found then IFN is set to IFN_LAST.
|
||||
This function matches the patterns shaped as:
|
||||
|
||||
c[i] = a[i] - b[i+1];
|
||||
c[i+1] = a[i+1] + b[i];
|
||||
|
||||
If a match occurred then TRUE is returned, else FALSE. The initial match is
|
||||
expected to be in OP1 and the initial match operands in args0. */
|
||||
|
||||
internal_fn
|
||||
complex_add_pattern::matches (complex_operation_t op,
|
||||
slp_tree_to_load_perm_map_t *perm_cache,
|
||||
vec<slp_tree> *ops)
|
||||
{
|
||||
internal_fn ifn = IFN_LAST;
|
||||
|
||||
/* Find the two components. Rotation in the complex plane will modify
|
||||
the operations:
|
||||
|
||||
* Rotation 0: + +
|
||||
* Rotation 90: - +
|
||||
* Rotation 180: - -
|
||||
* Rotation 270: + -
|
||||
|
||||
Rotation 0 and 180 can be handled by normal SIMD code, so we don't need
|
||||
to care about them here. */
|
||||
if (op == MINUS_PLUS)
|
||||
ifn = IFN_COMPLEX_ADD_ROT90;
|
||||
else if (op == PLUS_MINUS)
|
||||
ifn = IFN_COMPLEX_ADD_ROT270;
|
||||
else
|
||||
return ifn;
|
||||
|
||||
/* verify that there is a permute, otherwise this isn't a pattern we
|
||||
we support. */
|
||||
gcc_assert (ops->length () == 2);
|
||||
|
||||
vec<slp_tree> children = SLP_TREE_CHILDREN ((*ops)[0]);
|
||||
|
||||
/* First node must be unpermuted. */
|
||||
if (linear_loads_p (perm_cache, children[0]).first != PERM_EVENODD)
|
||||
return IFN_LAST;
|
||||
|
||||
/* Second node must be permuted. */
|
||||
if (linear_loads_p (perm_cache, children[1]).first != PERM_ODDEVEN)
|
||||
return IFN_LAST;
|
||||
|
||||
return ifn;
|
||||
}
|
||||
|
||||
/* Attempt to recognize a complex add pattern. */
|
||||
|
||||
vect_pattern*
|
||||
complex_add_pattern::recognize (slp_tree_to_load_perm_map_t *perm_cache,
|
||||
slp_tree *node)
|
||||
{
|
||||
auto_vec<slp_tree> ops;
|
||||
complex_operation_t op
|
||||
= vect_detect_pair_op (*node, true, &ops);
|
||||
internal_fn ifn = complex_add_pattern::matches (op, perm_cache, &ops);
|
||||
if (!vect_pattern_validate_optab (ifn, *node))
|
||||
return NULL;
|
||||
|
||||
return new complex_add_pattern (node, &ops, ifn);
|
||||
}
|
||||
|
||||
/*******************************************************************************
|
||||
* Pattern matching definitions
|
||||
******************************************************************************/
|
||||
|
||||
#define SLP_PATTERN(x) &x::recognize
|
||||
vect_pattern_decl_t slp_patterns[]
|
||||
{
|
||||
/* For least amount of back-tracking and more efficient matching
|
||||
order patterns from the largest to the smallest. Especially if they
|
||||
overlap in what they can detect. */
|
||||
|
||||
SLP_PATTERN (complex_add_pattern),
|
||||
};
|
||||
#undef SLP_PATTERN
|
||||
|
||||
/* Set the number of SLP pattern matchers available. */
|
||||
size_t num__slp_patterns = sizeof(slp_patterns)/sizeof(vect_pattern_decl_t);
|
@ -133,7 +133,7 @@ _slp_tree::~_slp_tree ()
|
||||
|
||||
/* Recursively free the memory allocated for the SLP tree rooted at NODE. */
|
||||
|
||||
static void
|
||||
void
|
||||
vect_free_slp_tree (slp_tree node)
|
||||
{
|
||||
int i;
|
||||
@ -177,17 +177,26 @@ vect_free_slp_instance (slp_instance instance)
|
||||
/* Create an SLP node for SCALAR_STMTS. */
|
||||
|
||||
slp_tree
|
||||
vect_create_new_slp_node (unsigned nops, tree_code code)
|
||||
{
|
||||
slp_tree node = new _slp_tree;
|
||||
SLP_TREE_SCALAR_STMTS (node) = vNULL;
|
||||
SLP_TREE_CHILDREN (node).create (nops);
|
||||
SLP_TREE_DEF_TYPE (node) = vect_internal_def;
|
||||
SLP_TREE_CODE (node) = code;
|
||||
return node;
|
||||
}
|
||||
/* Create an SLP node for SCALAR_STMTS. */
|
||||
|
||||
static slp_tree
|
||||
vect_create_new_slp_node (slp_tree node,
|
||||
vec<stmt_vec_info> scalar_stmts, unsigned nops)
|
||||
{
|
||||
SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
|
||||
SLP_TREE_CHILDREN (node).create (nops);
|
||||
SLP_TREE_DEF_TYPE (node) = vect_internal_def;
|
||||
if (scalar_stmts.exists ())
|
||||
{
|
||||
SLP_TREE_REPRESENTATIVE (node) = scalar_stmts[0];
|
||||
SLP_TREE_LANES (node) = scalar_stmts.length ();
|
||||
}
|
||||
return node;
|
||||
}
|
||||
|
||||
@ -2219,6 +2228,84 @@ calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
|
||||
return exact_div (common_multiple (nunits, group_size), group_size);
|
||||
}
|
||||
|
||||
/* Helper function of vect_match_slp_patterns.
|
||||
|
||||
Attempts to match patterns against the slp tree rooted in REF_NODE using
|
||||
VINFO. Patterns are matched in post-order traversal.
|
||||
|
||||
If matching is successful the value in REF_NODE is updated and returned, if
|
||||
not then it is returned unchanged. */
|
||||
|
||||
static bool
|
||||
vect_match_slp_patterns_2 (slp_tree *ref_node, vec_info *vinfo,
|
||||
slp_tree_to_load_perm_map_t *perm_cache,
|
||||
hash_set<slp_tree> *visited)
|
||||
{
|
||||
unsigned i;
|
||||
slp_tree node = *ref_node;
|
||||
bool found_p = false;
|
||||
if (!node || visited->add (node))
|
||||
return false;
|
||||
|
||||
slp_tree child;
|
||||
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
|
||||
found_p |= vect_match_slp_patterns_2 (&SLP_TREE_CHILDREN (node)[i],
|
||||
vinfo, perm_cache, visited);
|
||||
|
||||
for (unsigned x = 0; x < num__slp_patterns; x++)
|
||||
{
|
||||
vect_pattern *pattern = slp_patterns[x] (perm_cache, ref_node);
|
||||
if (pattern)
|
||||
{
|
||||
pattern->build (vinfo);
|
||||
delete pattern;
|
||||
found_p = true;
|
||||
}
|
||||
}
|
||||
|
||||
return found_p;
|
||||
}
|
||||
|
||||
/* Applies pattern matching to the given SLP tree rooted in REF_NODE using
|
||||
vec_info VINFO.
|
||||
|
||||
The modified tree is returned. Patterns are tried in order and multiple
|
||||
patterns may match. */
|
||||
|
||||
static bool
|
||||
vect_match_slp_patterns (slp_instance instance, vec_info *vinfo,
|
||||
hash_set<slp_tree> *visited,
|
||||
slp_tree_to_load_perm_map_t *perm_cache,
|
||||
scalar_stmts_to_slp_tree_map_t * /* bst_map */)
|
||||
{
|
||||
DUMP_VECT_SCOPE ("vect_match_slp_patterns");
|
||||
slp_tree *ref_node = &SLP_INSTANCE_TREE (instance);
|
||||
|
||||
if (dump_enabled_p ())
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"Analyzing SLP tree %p for patterns\n",
|
||||
SLP_INSTANCE_TREE (instance));
|
||||
|
||||
bool found_p
|
||||
= vect_match_slp_patterns_2 (ref_node, vinfo, perm_cache, visited);
|
||||
|
||||
if (found_p)
|
||||
{
|
||||
if (dump_enabled_p ())
|
||||
{
|
||||
dump_printf_loc (MSG_NOTE, vect_location,
|
||||
"Pattern matched SLP tree\n");
|
||||
vect_print_slp_graph (MSG_NOTE, vect_location, *ref_node);
|
||||
}
|
||||
}
|
||||
|
||||
return found_p;
|
||||
}
|
||||
|
||||
/* Analyze an SLP instance starting from a group of grouped stores. Call
|
||||
vect_build_slp_tree to build a tree of packed stmts if possible.
|
||||
Return FALSE if it's impossible to SLP any stmt in the loop. */
|
||||
|
||||
static bool
|
||||
vect_analyze_slp_instance (vec_info *vinfo,
|
||||
scalar_stmts_to_slp_tree_map_t *bst_map,
|
||||
@ -2586,6 +2673,7 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
|
||||
{
|
||||
unsigned int i;
|
||||
stmt_vec_info first_element;
|
||||
slp_instance instance;
|
||||
|
||||
DUMP_VECT_SCOPE ("vect_analyze_slp");
|
||||
|
||||
@ -2648,6 +2736,13 @@ vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
|
||||
&limit);
|
||||
}
|
||||
|
||||
hash_set<slp_tree> visited_patterns;
|
||||
slp_tree_to_load_perm_map_t perm_cache;
|
||||
/* See if any patterns can be found in the SLP tree. */
|
||||
FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (vinfo), i, instance)
|
||||
vect_match_slp_patterns (instance, vinfo, &visited_patterns, &perm_cache,
|
||||
bst_map);
|
||||
|
||||
/* The map keeps a reference on SLP nodes built, release that. */
|
||||
for (scalar_stmts_to_slp_tree_map_t::iterator it = bst_map->begin ();
|
||||
it != bst_map->end (); ++it)
|
||||
|
@ -26,6 +26,7 @@ typedef class _stmt_vec_info *stmt_vec_info;
|
||||
#include "tree-data-ref.h"
|
||||
#include "tree-hash-traits.h"
|
||||
#include "target.h"
|
||||
#include "internal-fn.h"
|
||||
|
||||
|
||||
/* Used for naming of new temporaries. */
|
||||
@ -2008,7 +2009,8 @@ extern void duplicate_and_interleave (vec_info *, gimple_seq *, tree,
|
||||
vec<tree>, unsigned int, vec<tree> &);
|
||||
extern int vect_get_place_in_interleaving_chain (stmt_vec_info, stmt_vec_info);
|
||||
extern bool vect_update_shared_vectype (stmt_vec_info, tree);
|
||||
extern slp_tree vect_create_new_slp_node (vec<stmt_vec_info>, unsigned);
|
||||
extern slp_tree vect_create_new_slp_node (unsigned, tree_code);
|
||||
extern void vect_free_slp_tree (slp_tree);
|
||||
|
||||
/* In tree-vect-patterns.c. */
|
||||
extern void
|
||||
@ -2025,4 +2027,84 @@ void vect_free_loop_info_assumptions (class loop *);
|
||||
gimple *vect_loop_vectorized_call (class loop *, gcond **cond = NULL);
|
||||
bool vect_stmt_dominates_stmt_p (gimple *, gimple *);
|
||||
|
||||
/* SLP Pattern matcher types, tree-vect-slp-patterns.c. */
|
||||
|
||||
/* Forward declaration of possible two operands operation that can be matched
|
||||
by the complex numbers pattern matchers. */
|
||||
enum _complex_operation : unsigned;
|
||||
|
||||
/* All possible load permute values that could result from the partial data-flow
|
||||
analysis. */
|
||||
typedef enum _complex_perm_kinds {
|
||||
PERM_UNKNOWN,
|
||||
PERM_EVENODD,
|
||||
PERM_ODDEVEN,
|
||||
PERM_ODDODD,
|
||||
PERM_EVENEVEN,
|
||||
/* Can be combined with any other PERM values. */
|
||||
PERM_TOP
|
||||
} complex_perm_kinds_t;
|
||||
|
||||
/* A pair with a load permute and a corresponding complex_perm_kind which gives
|
||||
information about the load it represents. */
|
||||
typedef std::pair<complex_perm_kinds_t, load_permutation_t>
|
||||
complex_load_perm_t;
|
||||
|
||||
/* Cache from nodes to the load permutation they represent. */
|
||||
typedef hash_map <slp_tree, complex_load_perm_t>
|
||||
slp_tree_to_load_perm_map_t;
|
||||
|
||||
/* Vector pattern matcher base class. All SLP pattern matchers must inherit
|
||||
from this type. */
|
||||
|
||||
class vect_pattern
|
||||
{
|
||||
protected:
|
||||
/* The number of arguments that the IFN requires. */
|
||||
unsigned m_num_args;
|
||||
|
||||
/* The internal function that will be used when a pattern is created. */
|
||||
internal_fn m_ifn;
|
||||
|
||||
/* The current node being inspected. */
|
||||
slp_tree *m_node;
|
||||
|
||||
/* The list of operands to be the children for the node produced when the
|
||||
internal function is created. */
|
||||
vec<slp_tree> m_ops;
|
||||
|
||||
/* Default constructor where NODE is the root of the tree to inspect. */
|
||||
vect_pattern (slp_tree *node, vec<slp_tree> *m_ops, internal_fn ifn)
|
||||
{
|
||||
this->m_ifn = ifn;
|
||||
this->m_node = node;
|
||||
this->m_ops.create (0);
|
||||
this->m_ops.safe_splice (*m_ops);
|
||||
}
|
||||
|
||||
public:
|
||||
|
||||
/* Create a new instance of the pattern matcher class of the given type. */
|
||||
static vect_pattern* recognize (slp_tree_to_load_perm_map_t *, slp_tree *);
|
||||
|
||||
/* Build the pattern from the data collected so far. */
|
||||
virtual void build (vec_info *) = 0;
|
||||
|
||||
/* Default destructor. */
|
||||
virtual ~vect_pattern ()
|
||||
{
|
||||
this->m_ops.release ();
|
||||
}
|
||||
};
|
||||
|
||||
/* Function pointer to create a new pattern matcher from a generic type. */
|
||||
typedef vect_pattern* (*vect_pattern_decl_t) (slp_tree_to_load_perm_map_t *,
|
||||
slp_tree *);
|
||||
|
||||
/* List of supported pattern matchers. */
|
||||
extern vect_pattern_decl_t slp_patterns[];
|
||||
|
||||
/* Number of supported pattern matchers. */
|
||||
extern size_t num__slp_patterns;
|
||||
|
||||
#endif /* GCC_TREE_VECTORIZER_H */
|
||||
|
Loading…
Reference in New Issue
Block a user