AVX Programming Reference (December, 2008)

gcc/

2009-01-06  H.J. Lu  <hongjiu.lu@intel.com>

	AVX Programming Reference (December, 2008)
	* config/i386/avxintrin.h (_mm_permute2_pd): Removed.
	(_mm256_permute2_pd): Likewise.
	(_mm_permute2_ps): Likewise.
	(_mm256_permute2_ps): Likewise.
	* config/i386/i386.md (UNSPEC_VPERMIL2): Likewise.
	* config/i386/sse.md (avx_vpermil2<mode>3): Likewise.

	* config/i386/i386.c (ix86_builtins): Remove
	IX86_BUILTIN_VPERMIL2PD, IX86_BUILTIN_VPERMIL2PS,
	IX86_BUILTIN_VPERMIL2PD256 and IX86_BUILTIN_VPERMIL2PS256.
	(ix86_builtin_type): Remove V8SF_FTYPE_V8SF_V8SF_V8SI_INT,
	V4DF_FTYPE_V4DF_V4DF_V4DI_INT, V4SF_FTYPE_V4SF_V4SF_V4SI_INT
	and V2DF_FTYPE_V2DF_V2DF_V2DI_INT.
	(bdesc_args): Remove __builtin_ia32_vpermil2pd,
	__builtin_ia32_vpermil2ps, __builtin_ia32_vpermil2pd256 and
	__builtin_ia32_vpermil2ps256.
	(ix86_init_mmx_sse_builtins): Updated.
	(ix86_expand_args_builtin): Likewise.

gcc/testsuite/

2009-01-06  H.J. Lu  <hongjiu.lu@intel.com>

	AVX Programming Reference (December, 2008)
	* gcc.target/i386/avx-2.c: Remove tests for _mm_permute2_pd,
	_mm256_permute2_pd, _mm_permute2_ps and _mm256_permute2_ps.
	* gcc.target/i386/sse-14.c: Likewise.

	* gcc.target/i386/avx-vpermil2pd-1.c: Removed.
	* gcc.target/i386/avx-vpermil2ps-1.c: Likewise.
	* gcc.target/i386/avx-vpermil2pd-256-1.c: Likewise.
	* gcc.target/i386/avx-vpermil2ps-256-1.c: Likewise.

From-SVN: r143116
This commit is contained in:
H.J. Lu 2009-01-06 06:21:43 -08:00
parent 44b864717e
commit e47b7d0419
13 changed files with 12966 additions and 13307 deletions

View File

@ -1,7 +1,29 @@
2009-01-06 H.J. Lu <hongjiu.lu@intel.com>
AVX Programming Reference (December, 2008)
* config/i386/avxintrin.h (_mm_permute2_pd): Removed.
(_mm256_permute2_pd): Likewise.
(_mm_permute2_ps): Likewise.
(_mm256_permute2_ps): Likewise.
* config/i386/i386.md (UNSPEC_VPERMIL2): Likewise.
* config/i386/sse.md (avx_vpermil2<mode>3): Likewise.
* config/i386/i386.c (ix86_builtins): Remove
IX86_BUILTIN_VPERMIL2PD, IX86_BUILTIN_VPERMIL2PS,
IX86_BUILTIN_VPERMIL2PD256 and IX86_BUILTIN_VPERMIL2PS256.
(ix86_builtin_type): Remove V8SF_FTYPE_V8SF_V8SF_V8SI_INT,
V4DF_FTYPE_V4DF_V4DF_V4DI_INT, V4SF_FTYPE_V4SF_V4SF_V4SI_INT
and V2DF_FTYPE_V2DF_V2DF_V2DI_INT.
(bdesc_args): Remove __builtin_ia32_vpermil2pd,
__builtin_ia32_vpermil2ps, __builtin_ia32_vpermil2pd256 and
__builtin_ia32_vpermil2ps256.
(ix86_init_mmx_sse_builtins): Updated.
(ix86_expand_args_builtin): Likewise.
2009-01-05 John David Anglin <dave.anglin@nrc-cnrc.gc.ca>
* pa.c (output_call): Relocate non-jump insns in the delay slot of long
absolute calls when generating PA 2.0 code.
* pa.c (output_call): Relocate non-jump insns in the delay slot of
long absolute calls when generating PA 2.0 code.
2009-01-05 Vladimir Makarov <vmakarov@redhat.com>

View File

@ -1,4 +1,4 @@
/* Copyright (C) 2008 Free Software Foundation, Inc.
/* Copyright (C) 2008, 2009 Free Software Foundation, Inc.
This file is part of GCC.
@ -626,42 +626,6 @@ _mm256_permute_ps (__m256 __X, const int __C)
{
return (__m256) __builtin_ia32_vpermilps256 ((__v8sf)__X, __C);
}
extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_permute2_pd (__m128d __X, __m128d __Y, __m128i __C, const int __I)
{
return (__m128d) __builtin_ia32_vpermil2pd ((__v2df)__X,
(__v2df)__Y,
(__v2di)__C,
__I);
}
extern __inline __m256d __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permute2_pd (__m256d __X, __m256d __Y, __m256i __C, const int __I)
{
return (__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)__X,
(__v4df)__Y,
(__v4di)__C,
__I);
}
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_permute2_ps (__m128 __X, __m128 __Y, __m128i __C, const int __I)
{
return (__m128) __builtin_ia32_vpermil2ps ((__v4sf)__X,
(__v4sf)__Y,
(__v4si)__C,
__I);
}
extern __inline __m256 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm256_permute2_ps (__m256 __X, __m256 __Y, __m256i __C, const int __I)
{
return (__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)__X,
(__v8sf)__Y,
(__v8si)__C,
__I);
}
#else
#define _mm_permute_pd(X, C) \
((__m128d) __builtin_ia32_vpermilpd ((__v2df)(__m128d)(X), (int)(C)))
@ -674,30 +638,6 @@ _mm256_permute2_ps (__m256 __X, __m256 __Y, __m256i __C, const int __I)
#define _mm256_permute_ps(X, C) \
((__m256) __builtin_ia32_vpermilps256 ((__v8sf)(__m256)(X), (int)(C)))
#define _mm_permute2_pd(X, Y, C, I) \
((__m128d) __builtin_ia32_vpermil2pd ((__v2df)(__m128d)(X), \
(__v2df)(__m128d)(Y), \
(__v2di)(__m128d)(C), \
(int)(I)))
#define _mm256_permute2_pd(X, Y, C, I) \
((__m256d) __builtin_ia32_vpermil2pd256 ((__v4df)(__m256d)(X), \
(__v4df)(__m256d)(Y), \
(__v4di)(__m256d)(C), \
(int)(I)))
#define _mm_permute2_ps(X, Y, C, I) \
((__m128) __builtin_ia32_vpermil2ps ((__v4sf)(__m128)(X), \
(__v4sf)(__m128)(Y), \
(__v4si)(__m128)(C), \
(int)(I)))
#define _mm256_permute2_ps(X, Y, C, I) \
((__m256) __builtin_ia32_vpermil2ps256 ((__v8sf)(__m256)(X), \
(__v8sf)(__m256)(Y), \
(__v8si)(__m256)(C), \
(int)(I)))
#endif
#ifdef __OPTIMIZE__

View File

@ -1,6 +1,6 @@
/* Subroutines used for code generation on IA-32.
Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
2002, 2003, 2004, 2005, 2006, 2007, 2008
2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
Free Software Foundation, Inc.
This file is part of GCC.
@ -19774,10 +19774,6 @@ enum ix86_builtins
IX86_BUILTIN_VPERMILPS,
IX86_BUILTIN_VPERMILPD256,
IX86_BUILTIN_VPERMILPS256,
IX86_BUILTIN_VPERMIL2PD,
IX86_BUILTIN_VPERMIL2PS,
IX86_BUILTIN_VPERMIL2PD256,
IX86_BUILTIN_VPERMIL2PS256,
IX86_BUILTIN_VPERM2F128PD256,
IX86_BUILTIN_VPERM2F128PS256,
IX86_BUILTIN_VPERM2F128SI256,
@ -20434,10 +20430,6 @@ enum ix86_builtin_type
V2DI2TI_FTYPE_V2DI_V2DI_INT,
V1DI2DI_FTYPE_V1DI_V1DI_INT,
V2DF_FTYPE_V2DF_V2DF_INT,
V8SF_FTYPE_V8SF_V8SF_V8SI_INT,
V4DF_FTYPE_V4DF_V4DF_V4DI_INT,
V4SF_FTYPE_V4SF_V4SF_V4SI_INT,
V2DF_FTYPE_V2DF_V2DF_V2DI_INT,
V2DI_FTYPE_V2DI_UINT_UINT,
V2DI_FTYPE_V2DI_V2DI_UINT_UINT
};
@ -21065,10 +21057,6 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
{ OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
@ -22010,26 +21998,6 @@ ix86_init_mmx_sse_builtins (void)
V4DF_type_node, V4DF_type_node,
integer_type_node,
NULL_TREE);
tree v8sf_ftype_v8sf_v8sf_v8si_int
= build_function_type_list (V8SF_type_node,
V8SF_type_node, V8SF_type_node,
V8SI_type_node, integer_type_node,
NULL_TREE);
tree v4df_ftype_v4df_v4df_v4di_int
= build_function_type_list (V4DF_type_node,
V4DF_type_node, V4DF_type_node,
V4DI_type_node, integer_type_node,
NULL_TREE);
tree v4sf_ftype_v4sf_v4sf_v4si_int
= build_function_type_list (V4SF_type_node,
V4SF_type_node, V4SF_type_node,
V4SI_type_node, integer_type_node,
NULL_TREE);
tree v2df_ftype_v2df_v2df_v2di_int
= build_function_type_list (V2DF_type_node,
V2DF_type_node, V2DF_type_node,
V2DI_type_node, integer_type_node,
NULL_TREE);
tree v8sf_ftype_pcfloat
= build_function_type_list (V8SF_type_node,
pcfloat_type_node,
@ -22733,18 +22701,6 @@ ix86_init_mmx_sse_builtins (void)
case V1DI2DI_FTYPE_V1DI_V1DI_INT:
type = v1di_ftype_v1di_v1di_int;
break;
case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
type = v8sf_ftype_v8sf_v8sf_v8si_int;
break;
case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
type = v4df_ftype_v4df_v4df_v4di_int;
break;
case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
type = v4sf_ftype_v4sf_v4sf_v4si_int;
break;
case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
type = v2df_ftype_v2df_v2df_v2di_int;
break;
default:
gcc_unreachable ();
}
@ -23906,13 +23862,6 @@ ix86_expand_args_builtin (const struct builtin_description *d,
nargs = 3;
nargs_constant = 2;
break;
case V8SF_FTYPE_V8SF_V8SF_V8SI_INT:
case V4DF_FTYPE_V4DF_V4DF_V4DI_INT:
case V4SF_FTYPE_V4SF_V4SF_V4SI_INT:
case V2DF_FTYPE_V2DF_V2DF_V2DI_INT:
nargs = 4;
nargs_constant = 1;
break;
case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
nargs = 4;
nargs_constant = 2;
@ -23982,10 +23931,6 @@ ix86_expand_args_builtin (const struct builtin_description *d,
case CODE_FOR_sse4_1_blendpd:
case CODE_FOR_avx_vpermilv2df:
case CODE_FOR_avx_vpermil2v2df3:
case CODE_FOR_avx_vpermil2v4sf3:
case CODE_FOR_avx_vpermil2v4df3:
case CODE_FOR_avx_vpermil2v8sf3:
error ("the last argument must be a 2-bit immediate");
return const0_rtx;

View File

@ -1,6 +1,6 @@
;; GCC machine description for IA-32 and x86-64.
;; Copyright (C) 1988, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
;; 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
;; 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
;; Free Software Foundation, Inc.
;; Mostly by William Schelter.
;; x86_64 support added by Jan Hubicka
@ -201,12 +201,11 @@
; For AVX support
(UNSPEC_PCMP 166)
(UNSPEC_VPERMIL 167)
(UNSPEC_VPERMIL2 168)
(UNSPEC_VPERMIL2F128 169)
(UNSPEC_MASKLOAD 170)
(UNSPEC_MASKSTORE 171)
(UNSPEC_CAST 172)
(UNSPEC_VTESTP 173)
(UNSPEC_VPERMIL2F128 168)
(UNSPEC_MASKLOAD 169)
(UNSPEC_MASKSTORE 170)
(UNSPEC_CAST 171)
(UNSPEC_VTESTP 172)
])
(define_constants

View File

@ -1,5 +1,5 @@
;; GCC machine description for SSE instructions
;; Copyright (C) 2005, 2006, 2007, 2008
;; Copyright (C) 2005, 2006, 2007, 2008, 2009
;; Free Software Foundation, Inc.
;;
;; This file is part of GCC.
@ -11597,20 +11597,6 @@
(set_attr "prefix" "vex")
(set_attr "mode" "<MODE>")])
(define_insn "avx_vpermil2<mode>3"
[(set (match_operand:AVXMODEF2P 0 "register_operand" "=x,x")
(unspec:AVXMODEF2P
[(match_operand:AVXMODEF2P 1 "register_operand" "x,x")
(match_operand:AVXMODEF2P 2 "nonimmediate_operand" "x,xm")
(match_operand:<avxpermvecmode> 3 "nonimmediate_operand" "xm,x")
(match_operand:SI 4 "const_0_to_3_operand" "n,n")]
UNSPEC_VPERMIL2))]
"TARGET_AVX"
"vpermil2p<avxmodesuffixf2c>\t{%4, %3, %2, %1, %0|%0, %1, %2, %3, %4}"
[(set_attr "type" "sselog")
(set_attr "prefix" "vex")
(set_attr "mode" "<MODE>")])
(define_insn "avx_vperm2f128<mode>3"
[(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
(unspec:AVX256MODE2P

File diff suppressed because it is too large Load Diff

12921
gcc/testsuite/ChangeLog-2008 Normal file

File diff suppressed because it is too large Load Diff

View File

@ -78,10 +78,6 @@ test_1 (_mm_permute_pd, __m128d, __m128d, 1)
test_1 (_mm256_permute_pd, __m256d, __m256d, 1)
test_1 (_mm_permute_ps, __m128, __m128, 1)
test_1 (_mm256_permute_ps, __m256, __m256, 1)
test_3 (_mm_permute2_pd, __m128d, __m128d, __m128d, __m128d, 1)
test_3 (_mm256_permute2_pd, __m256d, __m256d, __m256d, __m256d, 1)
test_3 (_mm_permute2_ps, __m128, __m128, __m128, __m128, 1)
test_3 (_mm256_permute2_ps, __m256, __m256, __m256, __m256, 1)
test_2 (_mm256_permute2f128_pd, __m256d, __m256d, __m256d, 1)
test_2 (_mm256_permute2f128_ps, __m256, __m256, __m256, 1)
test_2 (_mm256_permute2f128_si256, __m256i, __m256i, __m256i, 1)

View File

@ -1,55 +0,0 @@
/* { dg-do run } */
/* { dg-require-effective-target avx } */
/* { dg-options "-O2 -mavx" } */
#include "avx-check.h"
#ifndef ZERO_MATCH
#define ZERO_MATCH 2
#endif
static double
select2dp(double *src1, double *src2, long long sel)
{
double tmp = 0.0;
if ((sel & 0x3) == 0) tmp = src1[0];
if ((sel & 0x3) == 1) tmp = src1[1];
if ((sel & 0x3) == 2) tmp = src2[0];
if ((sel & 0x3) == 3) tmp = src2[1];
return tmp;
}
static double
sel_and_condzerodp(double *src1, double *src2, long long sel, int imm8)
{
double tmp;
tmp = select2dp(src1, src2, sel & 0x3);
if (((imm8 & 0x3) == 2) && ((sel & 0x4) == 0x4)) tmp = 0;
if (((imm8 & 0x3) == 3) && ((sel & 0x4) == 0x0)) tmp = 0;
return tmp;
}
void static
avx_test ()
{
union128d s1, s2, u;
union128i_q s3;
double e[2];
s1.x = _mm_set_pd (1, 2);
s2.x = _mm_set_pd (3, 4);
s3.x = _mm_set_epi64x (1, 2);
u.x = _mm_permute2_pd(s1.x, s2.x, s3.x, ZERO_MATCH);
e[0] = sel_and_condzerodp (s1.a, s2.a, (s3.a[0] & 0xe)>>1, ZERO_MATCH);
e[1] = sel_and_condzerodp (s1.a, s2.a, (s3.a[1] & 0xe)>>1, ZERO_MATCH);
if (check_union128d (u, e))
abort ();
}

View File

@ -1,57 +0,0 @@
/* { dg-do run } */
/* { dg-require-effective-target avx } */
/* { dg-options "-O2 -mavx" } */
#include "avx-check.h"
#ifndef ZERO_MATCH
#define ZERO_MATCH 1
#endif
static double
select2dp(double *src1, double *src2, long long sel)
{
double tmp = 3.414;
if ((sel & 0x3) == 0) tmp = src1[0];
if ((sel & 0x3) == 1) tmp = src1[1];
if ((sel & 0x3) == 2) tmp = src2[0];
if ((sel & 0x3) == 3) tmp = src2[1];
return tmp;
}
static double
sel_and_condzerodp(double *src1, double *src2, long long sel, int imm8)
{
double tmp;
tmp = select2dp(src1, src2, sel);
if (((imm8 & 0x3) == 2) && ((sel & 0x4) == 0x4)) tmp = 0;
if (((imm8 & 0x3) == 3) && ((sel & 0x4) == 0x0)) tmp = 0;
return tmp;
}
void static
avx_test ()
{
union256d u, s1, s2;
double e[4] = {0.0};
union256i_q s3;
s1.x = _mm256_set_pd (1, 2, 3, 4);
s2.x = _mm256_set_pd (5, 6, 7, 8);
s3.x = _mm256_set_epi64x (0, 1, 2, 3);
u.x = _mm256_permute2_pd(s1.x, s2.x, s3.x, ZERO_MATCH);
e[0] = sel_and_condzerodp (s1.a, s2.a, (s3.a[0] & 0xe)>>1, ZERO_MATCH);
e[1] = sel_and_condzerodp (s1.a, s2.a, (s3.a[1] & 0xe)>>1, ZERO_MATCH);
e[2] = sel_and_condzerodp (s1.a + 2, s2.a + 2, (s3.a[2] & 0xe)>>1, ZERO_MATCH);
e[3] = sel_and_condzerodp (s1.a + 2, s2.a + 2, (s3.a[3] & 0xe)>>1, ZERO_MATCH);
if (check_union256d (u, e))
abort ();
}

View File

@ -1,62 +0,0 @@
/* { dg-do run } */
/* { dg-require-effective-target avx } */
/* { dg-options "-O2 -mavx" } */
#include "avx-check.h"
#ifndef ZERO_MATCH
#define ZERO_MATCH 1
#endif
static float
select2sp(float *src1, float *src2, int sel)
{
float tmp;
if ((sel & 0x7) == 0) tmp = src1[0];
if ((sel & 0x7) == 1) tmp = src1[1];
if ((sel & 0x7) == 2) tmp = src1[2];
if ((sel & 0x7) == 3) tmp = src1[3];
if ((sel & 0x7) == 4) tmp = src2[0];
if ((sel & 0x7) == 5) tmp = src2[1];
if ((sel & 0x7) == 6) tmp = src2[2];
if ((sel & 0x7) == 7) tmp = src2[3];
return tmp;
}
static float
sel_and_condzerosp(float *src1, float *src2, int sel, int imm8)
{
float tmp;
tmp = select2sp(src1, src2, sel & 0x7);
if (((imm8 & 0x3) == 2) && ((sel & 0x8) == 0x8)) tmp = 0;
if (((imm8 & 0x3) == 3) && ((sel & 0x8) == 0x0)) tmp = 0;
return tmp;
}
void static
avx_test ()
{
int i;
union128 source1, source2, u;
union128i_d source3;
float s1[4] = {1, 2, 3, 4};
float s2[4] = {5, 6, 7, 8};
int s3[4] = {0, 1, 0, 1};
float e[4];
source1.x = _mm_loadu_ps(s1);
source2.x = _mm_loadu_ps(s2);
source3.x = _mm_loadu_si128((__m128i*) s3);
u.x = _mm_permute2_ps(source1.x, source2.x, source3.x, ZERO_MATCH);
for (i = 0; i < 4; ++i) {
e[i] = sel_and_condzerosp(&s1[i & 0x4], &s2[i & 0x4], s3[i] & 0xf, ZERO_MATCH & 0x3);
}
if (check_union128 (u, e))
abort ();
}

View File

@ -1,62 +0,0 @@
/* { dg-do run } */
/* { dg-require-effective-target avx } */
/* { dg-options "-O2 -mavx" } */
#include "avx-check.h"
#ifndef ZERO_MATCH
#define ZERO_MATCH 3
#endif
static float
select2sp(float *src1, float *src2, int sel)
{
float tmp;
if ((sel & 0x7) == 0) tmp = src1[0];
if ((sel & 0x7) == 1) tmp = src1[1];
if ((sel & 0x7) == 2) tmp = src1[2];
if ((sel & 0x7) == 3) tmp = src1[3];
if ((sel & 0x7) == 4) tmp = src2[0];
if ((sel & 0x7) == 5) tmp = src2[1];
if ((sel & 0x7) == 6) tmp = src2[2];
if ((sel & 0x7) == 7) tmp = src2[3];
return tmp;
}
static float
sel_and_condzerosp(float *src1, float *src2, int sel, int imm8)
{
float tmp;
tmp = select2sp(src1, src2, sel & 0x7);
if (((imm8 & 0x3) == 2) && ((sel & 0x8) == 0x8)) tmp = 0;
if (((imm8 & 0x3) == 3) && ((sel & 0x8) == 0x0)) tmp = 0;
return tmp;
}
void static
avx_test ()
{
int i;
union256 source1, source2, u;
union256i_d source3;
float s1[8]={1, 2, 3, 4, 5, 6, 7, 8};
float s2[8]={9, 10, 11, 12, 13, 14, 15, 16};
int s3[8]={11, 2, 3, 15, 5, 12, 7, 8};
float e[8];
source1.x = _mm256_loadu_ps(s1);
source2.x = _mm256_loadu_ps(s2);
source3.x = _mm256_loadu_si256((__m256i*) s3);
u.x = _mm256_permute2_ps(source1.x, source2.x, source3.x, ZERO_MATCH);
for (i = 0; i < 8; ++i) {
e[i] = sel_and_condzerosp(&s1[i & 0x4], &s2[i & 0x4], s3[i] & 0xf, ZERO_MATCH & 0x3);
}
if (check_union256(u, e))
abort ();
}

View File

@ -74,10 +74,6 @@ test_1 (_mm_permute_pd, __m128d, __m128d, 1)
test_1 (_mm256_permute_pd, __m256d, __m256d, 1)
test_1 (_mm_permute_ps, __m128, __m128, 1)
test_1 (_mm256_permute_ps, __m256, __m256, 1)
test_3 (_mm_permute2_pd, __m128d, __m128d, __m128d, __m128d, 1)
test_3 (_mm256_permute2_pd, __m256d, __m256d, __m256d, __m256d, 1)
test_3 (_mm_permute2_ps, __m128, __m128, __m128, __m128, 1)
test_3 (_mm256_permute2_ps, __m256, __m256, __m256, __m256, 1)
test_2 (_mm256_permute2f128_pd, __m256d, __m256d, __m256d, 1)
test_2 (_mm256_permute2f128_ps, __m256, __m256, __m256, 1)
test_2 (_mm256_permute2f128_si256, __m256i, __m256i, __m256i, 1)