mirror of
https://gcc.gnu.org/git/gcc.git
synced 2024-12-11 12:54:00 +08:00
AVX Programming Reference (December, 2008)
gcc/ 2009-01-06 H.J. Lu <hongjiu.lu@intel.com> AVX Programming Reference (December, 2008) * config/i386/avxintrin.h (_mm_permute2_pd): Removed. (_mm256_permute2_pd): Likewise. (_mm_permute2_ps): Likewise. (_mm256_permute2_ps): Likewise. * config/i386/i386.md (UNSPEC_VPERMIL2): Likewise. * config/i386/sse.md (avx_vpermil2<mode>3): Likewise. * config/i386/i386.c (ix86_builtins): Remove IX86_BUILTIN_VPERMIL2PD, IX86_BUILTIN_VPERMIL2PS, IX86_BUILTIN_VPERMIL2PD256 and IX86_BUILTIN_VPERMIL2PS256. (ix86_builtin_type): Remove V8SF_FTYPE_V8SF_V8SF_V8SI_INT, V4DF_FTYPE_V4DF_V4DF_V4DI_INT, V4SF_FTYPE_V4SF_V4SF_V4SI_INT and V2DF_FTYPE_V2DF_V2DF_V2DI_INT. (bdesc_args): Remove __builtin_ia32_vpermil2pd, __builtin_ia32_vpermil2ps, __builtin_ia32_vpermil2pd256 and __builtin_ia32_vpermil2ps256. (ix86_init_mmx_sse_builtins): Updated. (ix86_expand_args_builtin): Likewise. gcc/testsuite/ 2009-01-06 H.J. Lu <hongjiu.lu@intel.com> AVX Programming Reference (December, 2008) * gcc.target/i386/avx-2.c: Remove tests for _mm_permute2_pd, _mm256_permute2_pd, _mm_permute2_ps and _mm256_permute2_ps. * gcc.target/i386/sse-14.c: Likewise. * gcc.target/i386/avx-vpermil2pd-1.c: Removed. * gcc.target/i386/avx-vpermil2ps-1.c: Likewise. * gcc.target/i386/avx-vpermil2pd-256-1.c: Likewise. * gcc.target/i386/avx-vpermil2ps-256-1.c: Likewise. From-SVN: r143116
This commit is contained in:
parent
44b864717e
commit
e47b7d0419
@ -1,7 +1,29 @@
|
||||
2009-01-06 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
AVX Programming Reference (December, 2008)
|
||||
* config/i386/avxintrin.h (_mm_permute2_pd): Removed.
|
||||
(_mm256_permute2_pd): Likewise.
|
||||
(_mm_permute2_ps): Likewise.
|
||||
(_mm256_permute2_ps): Likewise.
|
||||
* config/i386/i386.md (UNSPEC_VPERMIL2): Likewise.
|
||||
* config/i386/sse.md (avx_vpermil2<mode>3): Likewise.
|
||||
|
||||
* config/i386/i386.c (ix86_builtins): Remove
|
||||
IX86_BUILTIN_VPERMIL2PD, IX86_BUILTIN_VPERMIL2PS,
|
||||
IX86_BUILTIN_VPERMIL2PD256 and IX86_BUILTIN_VPERMIL2PS256.
|
||||
(ix86_builtin_type): Remove V8SF_FTYPE_V8SF_V8SF_V8SI_INT,
|
||||
V4DF_FTYPE_V4DF_V4DF_V4DI_INT, V4SF_FTYPE_V4SF_V4SF_V4SI_INT
|
||||
and V2DF_FTYPE_V2DF_V2DF_V2DI_INT.
|
||||
(bdesc_args): Remove __builtin_ia32_vpermil2pd,
|
||||
__builtin_ia32_vpermil2ps, __builtin_ia32_vpermil2pd256 and
|
||||
__builtin_ia32_vpermil2ps256.
|
||||
(ix86_init_mmx_sse_builtins): Updated.
|
||||
(ix86_expand_args_builtin): Likewise.
|
||||
|
||||
2009-01-05 John David Anglin <dave.anglin@nrc-cnrc.gc.ca>
|
||||
|
||||
* pa.c (output_call): Relocate non-jump insns in the delay slot of long
|
||||
absolute calls when generating PA 2.0 code.
|
||||
* pa.c (output_call): Relocate non-jump insns in the delay slot of
|
||||
long absolute calls when generating PA 2.0 code.
|
||||
|
||||
2009-01-05 Vladimir Makarov <vmakarov@redhat.com>
|
||||
|
||||
|
@ -1,4 +1,4 @@
|
||||
/* Copyright (C) 2008 Free Software Foundation, Inc.
|
||||
/* Copyright (C) 2008, 2009 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
@ -626,42 +626,6 @@ _mm256_permute_ps (__m256 __X, const int __C)
|
||||
{
|
||||
return (__m256) __builtin_ia32_vpermilps256 ((__v8sf)__X, __C);
|
||||
}
|
||||
|
||||
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_permute2_pd (__m128d __X, __m128d __Y, __m128i __C, const int __I)
|
||||
{
|
||||
return (__m128d) __builtin_ia32_vpermil2pd ((__v2df)__X,
|
||||
(__v2df)__Y,
|
||||
(__v2di)__C,
|
||||
__I);
|
||||
}
|
||||
|
||||
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_permute2_pd (__m256d __X, __m256d __Y, __m256i __C, const int __I)
|
||||
{
|
||||
return (__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)__X,
|
||||
(__v4df)__Y,
|
||||
(__v4di)__C,
|
||||
__I);
|
||||
}
|
||||
|
||||
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm_permute2_ps (__m128 __X, __m128 __Y, __m128i __C, const int __I)
|
||||
{
|
||||
return (__m128) __builtin_ia32_vpermil2ps ((__v4sf)__X,
|
||||
(__v4sf)__Y,
|
||||
(__v4si)__C,
|
||||
__I);
|
||||
}
|
||||
|
||||
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
|
||||
_mm256_permute2_ps (__m256 __X, __m256 __Y, __m256i __C, const int __I)
|
||||
{
|
||||
return (__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)__X,
|
||||
(__v8sf)__Y,
|
||||
(__v8si)__C,
|
||||
__I);
|
||||
}
|
||||
#else
|
||||
#define _mm_permute_pd(X, C) \
|
||||
((__m128d) __builtin_ia32_vpermilpd ((__v2df)(__m128d)(X), (int)(C)))
|
||||
@ -674,30 +638,6 @@ _mm256_permute2_ps (__m256 __X, __m256 __Y, __m256i __C, const int __I)
|
||||
|
||||
#define _mm256_permute_ps(X, C) \
|
||||
((__m256) __builtin_ia32_vpermilps256 ((__v8sf)(__m256)(X), (int)(C)))
|
||||
|
||||
#define _mm_permute2_pd(X, Y, C, I) \
|
||||
((__m128d) __builtin_ia32_vpermil2pd ((__v2df)(__m128d)(X), \
|
||||
(__v2df)(__m128d)(Y), \
|
||||
(__v2di)(__m128d)(C), \
|
||||
(int)(I)))
|
||||
|
||||
#define _mm256_permute2_pd(X, Y, C, I) \
|
||||
((__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)(__m256d)(X), \
|
||||
(__v4df)(__m256d)(Y), \
|
||||
(__v4di)(__m256d)(C), \
|
||||
(int)(I)))
|
||||
|
||||
#define _mm_permute2_ps(X, Y, C, I) \
|
||||
((__m128) __builtin_ia32_vpermil2ps ((__v4sf)(__m128)(X), \
|
||||
(__v4sf)(__m128)(Y), \
|
||||
(__v4si)(__m128)(C), \
|
||||
(int)(I)))
|
||||
|
||||
#define _mm256_permute2_ps(X, Y, C, I) \
|
||||
((__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)(__m256)(X), \
|
||||
(__v8sf)(__m256)(Y), \
|
||||
(__v8si)(__m256)(C), \
|
||||
(int)(I)))
|
||||
#endif
|
||||
|
||||
#ifdef __OPTIMIZE__
|
||||
|
@ -1,6 +1,6 @@
|
||||
/* Subroutines used for code generation on IA-32.
|
||||
Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
|
||||
2002, 2003, 2004, 2005, 2006, 2007, 2008
|
||||
2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
|
||||
Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
@ -19774,10 +19774,6 @@ enum ix86_builtins
|
||||
IX86_BUILTIN_VPERMILPS,
|
||||
IX86_BUILTIN_VPERMILPD256,
|
||||
IX86_BUILTIN_VPERMILPS256,
|
||||
IX86_BUILTIN_VPERMIL2PD,
|
||||
IX86_BUILTIN_VPERMIL2PS,
|
||||
IX86_BUILTIN_VPERMIL2PD256,
|
||||
IX86_BUILTIN_VPERMIL2PS256,
|
||||
IX86_BUILTIN_VPERM2F128PD256,
|
||||
IX86_BUILTIN_VPERM2F128PS256,
|
||||
IX86_BUILTIN_VPERM2F128SI256,
|
||||
@ -20434,10 +20430,6 @@ enum ix86_builtin_type
|
||||
V2DI2TI_FTYPE_V2DI_V2DI_INT,
|
||||
V1DI2DI_FTYPE_V1DI_V1DI_INT,
|
||||
V2DF_FTYPE_V2DF_V2DF_INT,
|
||||
V8SF_FTYPE_V8SF_V8SF_V8SI_INT,
|
||||
V4DF_FTYPE_V4DF_V4DF_V4DI_INT,
|
||||
V4SF_FTYPE_V4SF_V4SF_V4SI_INT,
|
||||
V2DF_FTYPE_V2DF_V2DF_V2DI_INT,
|
||||
V2DI_FTYPE_V2DI_UINT_UINT,
|
||||
V2DI_FTYPE_V2DI_V2DI_UINT_UINT
|
||||
};
|
||||
@ -21065,10 +21057,6 @@ static const struct builtin_description bdesc_args[] =
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
|
||||
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
|
||||
@ -22010,26 +21998,6 @@ ix86_init_mmx_sse_builtins (void)
|
||||
V4DF_type_node, V4DF_type_node,
|
||||
integer_type_node,
|
||||
NULL_TREE);
|
||||
tree v8sf_ftype_v8sf_v8sf_v8si_int
|
||||
= build_function_type_list (V8SF_type_node,
|
||||
V8SF_type_node, V8SF_type_node,
|
||||
V8SI_type_node, integer_type_node,
|
||||
NULL_TREE);
|
||||
tree v4df_ftype_v4df_v4df_v4di_int
|
||||
= build_function_type_list (V4DF_type_node,
|
||||
V4DF_type_node, V4DF_type_node,
|
||||
V4DI_type_node, integer_type_node,
|
||||
NULL_TREE);
|
||||
tree v4sf_ftype_v4sf_v4sf_v4si_int
|
||||
= build_function_type_list (V4SF_type_node,
|
||||
V4SF_type_node, V4SF_type_node,
|
||||
V4SI_type_node, integer_type_node,
|
||||
NULL_TREE);
|
||||
tree v2df_ftype_v2df_v2df_v2di_int
|
||||
= build_function_type_list (V2DF_type_node,
|
||||
V2DF_type_node, V2DF_type_node,
|
||||
V2DI_type_node, integer_type_node,
|
||||
NULL_TREE);
|
||||
tree v8sf_ftype_pcfloat
|
||||
= build_function_type_list (V8SF_type_node,
|
||||
pcfloat_type_node,
|
||||
@ -22733,18 +22701,6 @@ ix86_init_mmx_sse_builtins (void)
|
||||
case V1DI2DI_FTYPE_V1DI_V1DI_INT:
|
||||
type = v1di_ftype_v1di_v1di_int;
|
||||
break;
|
||||
case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
|
||||
type = v8sf_ftype_v8sf_v8sf_v8si_int;
|
||||
break;
|
||||
case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
|
||||
type = v4df_ftype_v4df_v4df_v4di_int;
|
||||
break;
|
||||
case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
|
||||
type = v4sf_ftype_v4sf_v4sf_v4si_int;
|
||||
break;
|
||||
case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
|
||||
type = v2df_ftype_v2df_v2df_v2di_int;
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
@ -23906,13 +23862,6 @@ ix86_expand_args_builtin (const struct builtin_description *d,
|
||||
nargs = 3;
|
||||
nargs_constant = 2;
|
||||
break;
|
||||
case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
|
||||
case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
|
||||
case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
|
||||
case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
|
||||
nargs = 4;
|
||||
nargs_constant = 1;
|
||||
break;
|
||||
case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
|
||||
nargs = 4;
|
||||
nargs_constant = 2;
|
||||
@ -23982,10 +23931,6 @@ ix86_expand_args_builtin (const struct builtin_description *d,
|
||||
|
||||
case CODE_FOR_sse4_1_blendpd:
|
||||
case CODE_FOR_avx_vpermilv2df:
|
||||
case CODE_FOR_avx_vpermil2v2df3:
|
||||
case CODE_FOR_avx_vpermil2v4sf3:
|
||||
case CODE_FOR_avx_vpermil2v4df3:
|
||||
case CODE_FOR_avx_vpermil2v8sf3:
|
||||
error ("the last argument must be a 2-bit immediate");
|
||||
return const0_rtx;
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
;; GCC machine description for IA-32 and x86-64.
|
||||
;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
|
||||
;; 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
|
||||
;; 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
|
||||
;; Free Software Foundation, Inc.
|
||||
;; Mostly by William Schelter.
|
||||
;; x86_64 support added by Jan Hubicka
|
||||
@ -201,12 +201,11 @@
|
||||
; For AVX support
|
||||
(UNSPEC_PCMP 166)
|
||||
(UNSPEC_VPERMIL 167)
|
||||
(UNSPEC_VPERMIL2 168)
|
||||
(UNSPEC_VPERMIL2F128 169)
|
||||
(UNSPEC_MASKLOAD 170)
|
||||
(UNSPEC_MASKSTORE 171)
|
||||
(UNSPEC_CAST 172)
|
||||
(UNSPEC_VTESTP 173)
|
||||
(UNSPEC_VPERMIL2F128 168)
|
||||
(UNSPEC_MASKLOAD 169)
|
||||
(UNSPEC_MASKSTORE 170)
|
||||
(UNSPEC_CAST 171)
|
||||
(UNSPEC_VTESTP 172)
|
||||
])
|
||||
|
||||
(define_constants
|
||||
|
@ -1,5 +1,5 @@
|
||||
;; GCC machine description for SSE instructions
|
||||
;; Copyright (C) 2005, 2006, 2007, 2008
|
||||
;; Copyright (C) 2005, 2006, 2007, 2008, 2009
|
||||
;; Free Software Foundation, Inc.
|
||||
;;
|
||||
;; This file is part of GCC.
|
||||
@ -11597,20 +11597,6 @@
|
||||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "avx_vpermil2<mode>3"
|
||||
[(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x")
|
||||
(unspec:AVXMODEF2P
|
||||
[(match_operand:AVXMODEF2P 1 "register_operand" "x,x")
|
||||
(match_operand:AVXMODEF2P 2 "nonimmediate_operand" "x,xm")
|
||||
(match_operand:<avxpermvecmode> 3 "nonimmediate_operand" "xm,x")
|
||||
(match_operand:SI 4 "const_0_to_3_operand" "n,n")]
|
||||
UNSPEC_VPERMIL2))]
|
||||
"TARGET_AVX"
|
||||
"vpermil2p<avxmodesuffixf2c>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
|
||||
[(set_attr "type" "sselog")
|
||||
(set_attr "prefix" "vex")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "avx_vperm2f128<mode>3"
|
||||
[(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
|
||||
(unspec:AVX256MODE2P
|
||||
|
12934
gcc/testsuite/ChangeLog
12934
gcc/testsuite/ChangeLog
File diff suppressed because it is too large
Load Diff
12921
gcc/testsuite/ChangeLog-2008
Normal file
12921
gcc/testsuite/ChangeLog-2008
Normal file
File diff suppressed because it is too large
Load Diff
@ -78,10 +78,6 @@ test_1 (_mm_permute_pd, __m128d, __m128d, 1)
|
||||
test_1 (_mm256_permute_pd, __m256d, __m256d, 1)
|
||||
test_1 (_mm_permute_ps, __m128, __m128, 1)
|
||||
test_1 (_mm256_permute_ps, __m256, __m256, 1)
|
||||
test_3 (_mm_permute2_pd, __m128d, __m128d, __m128d, __m128d, 1)
|
||||
test_3 (_mm256_permute2_pd, __m256d, __m256d, __m256d, __m256d, 1)
|
||||
test_3 (_mm_permute2_ps, __m128, __m128, __m128, __m128, 1)
|
||||
test_3 (_mm256_permute2_ps, __m256, __m256, __m256, __m256, 1)
|
||||
test_2 (_mm256_permute2f128_pd, __m256d, __m256d, __m256d, 1)
|
||||
test_2 (_mm256_permute2f128_ps, __m256, __m256, __m256, 1)
|
||||
test_2 (_mm256_permute2f128_si256, __m256i, __m256i, __m256i, 1)
|
||||
|
@ -1,55 +0,0 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx } */
|
||||
/* { dg-options "-O2 -mavx" } */
|
||||
|
||||
#include "avx-check.h"
|
||||
|
||||
#ifndef ZERO_MATCH
|
||||
#define ZERO_MATCH 2
|
||||
#endif
|
||||
|
||||
static double
|
||||
select2dp(double *src1, double *src2, long long sel)
|
||||
{
|
||||
double tmp = 0.0;
|
||||
|
||||
if ((sel & 0x3) == 0) tmp = src1[0];
|
||||
if ((sel & 0x3) == 1) tmp = src1[1];
|
||||
if ((sel & 0x3) == 2) tmp = src2[0];
|
||||
if ((sel & 0x3) == 3) tmp = src2[1];
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static double
|
||||
sel_and_condzerodp(double *src1, double *src2, long long sel, int imm8)
|
||||
{
|
||||
double tmp;
|
||||
|
||||
tmp = select2dp(src1, src2, sel & 0x3);
|
||||
|
||||
if (((imm8 & 0x3) == 2) && ((sel & 0x4) == 0x4)) tmp = 0;
|
||||
if (((imm8 & 0x3) == 3) && ((sel & 0x4) == 0x0)) tmp = 0;
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
void static
|
||||
avx_test ()
|
||||
{
|
||||
union128d s1, s2, u;
|
||||
union128i_q s3;
|
||||
double e[2];
|
||||
|
||||
s1.x = _mm_set_pd (1, 2);
|
||||
s2.x = _mm_set_pd (3, 4);
|
||||
s3.x = _mm_set_epi64x (1, 2);
|
||||
u.x = _mm_permute2_pd(s1.x, s2.x, s3.x, ZERO_MATCH);
|
||||
|
||||
e[0] = sel_and_condzerodp (s1.a, s2.a, (s3.a[0] & 0xe)>>1, ZERO_MATCH);
|
||||
e[1] = sel_and_condzerodp (s1.a, s2.a, (s3.a[1] & 0xe)>>1, ZERO_MATCH);
|
||||
|
||||
if (check_union128d (u, e))
|
||||
abort ();
|
||||
}
|
||||
|
@ -1,57 +0,0 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx } */
|
||||
/* { dg-options "-O2 -mavx" } */
|
||||
|
||||
#include "avx-check.h"
|
||||
|
||||
#ifndef ZERO_MATCH
|
||||
#define ZERO_MATCH 1
|
||||
#endif
|
||||
|
||||
static double
|
||||
select2dp(double *src1, double *src2, long long sel)
|
||||
{
|
||||
double tmp = 3.414;
|
||||
|
||||
if ((sel & 0x3) == 0) tmp = src1[0];
|
||||
if ((sel & 0x3) == 1) tmp = src1[1];
|
||||
if ((sel & 0x3) == 2) tmp = src2[0];
|
||||
if ((sel & 0x3) == 3) tmp = src2[1];
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
static double
|
||||
sel_and_condzerodp(double *src1, double *src2, long long sel, int imm8)
|
||||
{
|
||||
double tmp;
|
||||
|
||||
tmp = select2dp(src1, src2, sel);
|
||||
|
||||
if (((imm8 & 0x3) == 2) && ((sel & 0x4) == 0x4)) tmp = 0;
|
||||
if (((imm8 & 0x3) == 3) && ((sel & 0x4) == 0x0)) tmp = 0;
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
void static
|
||||
avx_test ()
|
||||
{
|
||||
union256d u, s1, s2;
|
||||
double e[4] = {0.0};
|
||||
union256i_q s3;
|
||||
|
||||
s1.x = _mm256_set_pd (1, 2, 3, 4);
|
||||
s2.x = _mm256_set_pd (5, 6, 7, 8);
|
||||
s3.x = _mm256_set_epi64x (0, 1, 2, 3);
|
||||
u.x = _mm256_permute2_pd(s1.x, s2.x, s3.x, ZERO_MATCH);
|
||||
|
||||
e[0] = sel_and_condzerodp (s1.a, s2.a, (s3.a[0] & 0xe)>>1, ZERO_MATCH);
|
||||
e[1] = sel_and_condzerodp (s1.a, s2.a, (s3.a[1] & 0xe)>>1, ZERO_MATCH);
|
||||
e[2] = sel_and_condzerodp (s1.a + 2, s2.a + 2, (s3.a[2] & 0xe)>>1, ZERO_MATCH);
|
||||
e[3] = sel_and_condzerodp (s1.a + 2, s2.a + 2, (s3.a[3] & 0xe)>>1, ZERO_MATCH);
|
||||
|
||||
if (check_union256d (u, e))
|
||||
abort ();
|
||||
}
|
||||
|
@ -1,62 +0,0 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx } */
|
||||
/* { dg-options "-O2 -mavx" } */
|
||||
|
||||
#include "avx-check.h"
|
||||
|
||||
#ifndef ZERO_MATCH
|
||||
#define ZERO_MATCH 1
|
||||
#endif
|
||||
|
||||
static float
|
||||
select2sp(float *src1, float *src2, int sel)
|
||||
{
|
||||
float tmp;
|
||||
|
||||
if ((sel & 0x7) == 0) tmp = src1[0];
|
||||
if ((sel & 0x7) == 1) tmp = src1[1];
|
||||
if ((sel & 0x7) == 2) tmp = src1[2];
|
||||
if ((sel & 0x7) == 3) tmp = src1[3];
|
||||
if ((sel & 0x7) == 4) tmp = src2[0];
|
||||
if ((sel & 0x7) == 5) tmp = src2[1];
|
||||
if ((sel & 0x7) == 6) tmp = src2[2];
|
||||
if ((sel & 0x7) == 7) tmp = src2[3];
|
||||
|
||||
return tmp;
|
||||
}
|
||||
static float
|
||||
sel_and_condzerosp(float *src1, float *src2, int sel, int imm8)
|
||||
{
|
||||
float tmp;
|
||||
|
||||
tmp = select2sp(src1, src2, sel & 0x7);
|
||||
|
||||
if (((imm8 & 0x3) == 2) && ((sel & 0x8) == 0x8)) tmp = 0;
|
||||
if (((imm8 & 0x3) == 3) && ((sel & 0x8) == 0x0)) tmp = 0;
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
void static
|
||||
avx_test ()
|
||||
{
|
||||
int i;
|
||||
union128 source1, source2, u;
|
||||
union128i_d source3;
|
||||
float s1[4] = {1, 2, 3, 4};
|
||||
float s2[4] = {5, 6, 7, 8};
|
||||
int s3[4] = {0, 1, 0, 1};
|
||||
float e[4];
|
||||
|
||||
source1.x = _mm_loadu_ps(s1);
|
||||
source2.x = _mm_loadu_ps(s2);
|
||||
source3.x = _mm_loadu_si128((__m128i*) s3);
|
||||
u.x = _mm_permute2_ps(source1.x, source2.x, source3.x, ZERO_MATCH);
|
||||
|
||||
for (i = 0; i < 4; ++i) {
|
||||
e[i] = sel_and_condzerosp(&s1[i & 0x4], &s2[i & 0x4], s3[i] & 0xf, ZERO_MATCH & 0x3);
|
||||
}
|
||||
|
||||
if (check_union128 (u, e))
|
||||
abort ();
|
||||
}
|
@ -1,62 +0,0 @@
|
||||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx } */
|
||||
/* { dg-options "-O2 -mavx" } */
|
||||
|
||||
#include "avx-check.h"
|
||||
|
||||
#ifndef ZERO_MATCH
|
||||
#define ZERO_MATCH 3
|
||||
#endif
|
||||
|
||||
static float
|
||||
select2sp(float *src1, float *src2, int sel)
|
||||
{
|
||||
float tmp;
|
||||
|
||||
if ((sel & 0x7) == 0) tmp = src1[0];
|
||||
if ((sel & 0x7) == 1) tmp = src1[1];
|
||||
if ((sel & 0x7) == 2) tmp = src1[2];
|
||||
if ((sel & 0x7) == 3) tmp = src1[3];
|
||||
if ((sel & 0x7) == 4) tmp = src2[0];
|
||||
if ((sel & 0x7) == 5) tmp = src2[1];
|
||||
if ((sel & 0x7) == 6) tmp = src2[2];
|
||||
if ((sel & 0x7) == 7) tmp = src2[3];
|
||||
|
||||
return tmp;
|
||||
}
|
||||
static float
|
||||
sel_and_condzerosp(float *src1, float *src2, int sel, int imm8)
|
||||
{
|
||||
float tmp;
|
||||
|
||||
tmp = select2sp(src1, src2, sel & 0x7);
|
||||
|
||||
if (((imm8 & 0x3) == 2) && ((sel & 0x8) == 0x8)) tmp = 0;
|
||||
if (((imm8 & 0x3) == 3) && ((sel & 0x8) == 0x0)) tmp = 0;
|
||||
|
||||
return tmp;
|
||||
}
|
||||
|
||||
void static
|
||||
avx_test ()
|
||||
{
|
||||
int i;
|
||||
union256 source1, source2, u;
|
||||
union256i_d source3;
|
||||
float s1[8]={1, 2, 3, 4, 5, 6, 7, 8};
|
||||
float s2[8]={9, 10, 11, 12, 13, 14, 15, 16};
|
||||
int s3[8]={11, 2, 3, 15, 5, 12, 7, 8};
|
||||
float e[8];
|
||||
|
||||
source1.x = _mm256_loadu_ps(s1);
|
||||
source2.x = _mm256_loadu_ps(s2);
|
||||
source3.x = _mm256_loadu_si256((__m256i*) s3);
|
||||
u.x = _mm256_permute2_ps(source1.x, source2.x, source3.x, ZERO_MATCH);
|
||||
|
||||
for (i = 0; i < 8; ++i) {
|
||||
e[i] = sel_and_condzerosp(&s1[i & 0x4], &s2[i & 0x4], s3[i] & 0xf, ZERO_MATCH & 0x3);
|
||||
}
|
||||
|
||||
if (check_union256(u, e))
|
||||
abort ();
|
||||
}
|
@ -74,10 +74,6 @@ test_1 (_mm_permute_pd, __m128d, __m128d, 1)
|
||||
test_1 (_mm256_permute_pd, __m256d, __m256d, 1)
|
||||
test_1 (_mm_permute_ps, __m128, __m128, 1)
|
||||
test_1 (_mm256_permute_ps, __m256, __m256, 1)
|
||||
test_3 (_mm_permute2_pd, __m128d, __m128d, __m128d, __m128d, 1)
|
||||
test_3 (_mm256_permute2_pd, __m256d, __m256d, __m256d, __m256d, 1)
|
||||
test_3 (_mm_permute2_ps, __m128, __m128, __m128, __m128, 1)
|
||||
test_3 (_mm256_permute2_ps, __m256, __m256, __m256, __m256, 1)
|
||||
test_2 (_mm256_permute2f128_pd, __m256d, __m256d, __m256d, 1)
|
||||
test_2 (_mm256_permute2f128_ps, __m256, __m256, __m256, 1)
|
||||
test_2 (_mm256_permute2f128_si256, __m256i, __m256i, __m256i, 1)
|
||||
|
Loading…
Reference in New Issue
Block a user