sse.md (vec_interleave_highv16qi, [...]): Remove expanders.

* config/i386/sse.md (vec_interleave_highv16qi,
	vec_interleave_lowv16qi, vec_interleave_highv8hi,
	vec_interleave_lowv8hi, vec_interleave_highv4si,
	vec_interleave_lowv4si, vec_interleave_highv2di,
	vec_interleave_lowv2di, vec_interleave_highv4sf,
	vec_interleave_lowv4sf, vec_interleave_highv2df,
	vec_interleave_lowv2df): Remove expanders.
	(sse_unpcklps, sse_unpckhps, sse2_unpckhpd_exp, sse2_unpcklpd_exp,
	sse2_punpckhqdq, sse2_punpcklqdq, sse2_punpckhbw, sse2_punpcklbw,
	sse2_punpckhwd, sse2_punpcklwd, sse2_punpckhdq, sse2_punpckldq):
	Rename to their proper vec_interleave equivalent.  Update all users
	to the new name.
	* config/i386/i386.c, config/i386/i386.md: Update all users.

From-SVN: r154711
This commit is contained in:
Richard Henderson 2009-11-27 07:58:50 -08:00 committed by Richard Henderson
parent 315adedaf3
commit b0d49a6e80
4 changed files with 103 additions and 244 deletions

View File

@ -1,3 +1,19 @@
2009-11-27 Richard Henderson <rth@redhat.com>
* config/i386/sse.md (vec_interleave_highv16qi,
vec_interleave_lowv16qi, vec_interleave_highv8hi,
vec_interleave_lowv8hi, vec_interleave_highv4si,
vec_interleave_lowv4si, vec_interleave_highv2di,
vec_interleave_lowv2di, vec_interleave_highv4sf,
vec_interleave_lowv4sf, vec_interleave_highv2df,
vec_interleave_lowv2df): Remove expanders.
(sse_unpcklps, sse_unpckhps, sse2_unpckhpd_exp, sse2_unpcklpd_exp,
sse2_punpckhqdq, sse2_punpcklqdq, sse2_punpckhbw, sse2_punpcklbw,
sse2_punpckhwd, sse2_punpcklwd, sse2_punpckhdq, sse2_punpckldq):
Rename to their proper vec_interleave equivalent. Update all users
to the new name.
* config/i386/i386.c, config/i386/i386.md: Update all users.
2009-11-27 Michael Matz <matz@suse.de>
PR rtl-optimization/42084

View File

@ -13944,7 +13944,7 @@ ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
exponents = validize_mem (force_const_mem (V4SImode, x));
/* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
emit_insn (gen_vec_interleave_lowv4si (int_xmm, int_xmm, exponents));
/* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
@ -13970,7 +13970,7 @@ ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
else
{
x = copy_to_mode_reg (V2DFmode, fp_xmm);
emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
emit_insn (gen_vec_interleave_highv2df (fp_xmm, fp_xmm, fp_xmm));
emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
}
@ -21690,8 +21690,8 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
@ -21799,8 +21799,8 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_SSE2, CODE_FOR_copysignv2df3, "__builtin_ia32_copysignpd", IX86_BUILTIN_CPYSGNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2df, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2df, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
@ -21845,14 +21845,14 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv16qi, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv8hi, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv4si, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_highv2di, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv16qi, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv8hi, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv4si, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_vec_interleave_lowv2di, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
{ OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
@ -26483,6 +26483,7 @@ x86_emit_floatuns (rtx operands[2])
/* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
with all elements equal to VAR. Return true if successful. */
/* ??? Call into the vec_perm support to implement the broadcast. */
static bool
ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
@ -26552,7 +26553,7 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
tmp1 = gen_reg_rtx (V8HImode);
emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
/* Duplicate the low short through the whole low SImode word. */
emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
emit_insn (gen_vec_interleave_lowv8hi (tmp1, tmp1, tmp1));
/* Cast the V8HImode vector back to a V4SImode vector. */
tmp2 = gen_reg_rtx (V4SImode);
emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
@ -26584,8 +26585,8 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
tmp1 = gen_reg_rtx (V16QImode);
emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
/* Duplicate the low byte through the whole low SImode word. */
emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
emit_insn (gen_vec_interleave_lowv16qi (tmp1, tmp1, tmp1));
emit_insn (gen_vec_interleave_lowv16qi (tmp1, tmp1, tmp1));
/* Cast the V16QImode vector back to a V4SImode vector. */
tmp2 = gen_reg_rtx (V4SImode);
emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
@ -27417,7 +27418,7 @@ ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
/* tmp = target = A B C D */
tmp = copy_to_reg (target);
/* target = A A B B */
emit_insn (gen_sse_unpcklps (target, target, target));
emit_insn (gen_vec_interleave_lowv4sf (target, target, target));
/* target = X A B B */
ix86_expand_vector_set (false, target, val, 0);
/* target = A X C D */
@ -27627,7 +27628,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
case 2:
tmp = gen_reg_rtx (mode);
emit_insn (gen_sse_unpckhps (tmp, vec, vec));
emit_insn (gen_vec_interleave_highv4sf (tmp, vec, vec));
break;
default:
@ -27661,7 +27662,7 @@ ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
case 2:
tmp = gen_reg_rtx (mode);
emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
emit_insn (gen_vec_interleave_highv4si (tmp, vec, vec));
break;
default:
@ -29730,14 +29731,15 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
with interleave. */
t1 = gen_reg_rtx (V8HImode);
t2 = gen_reg_rtx (V8HImode);
emit_insn (gen_sse2_punpckhwd (t1, d->op0, d->op1));
emit_insn (gen_sse2_punpcklwd (d->target, d->op0, d->op1));
emit_insn (gen_sse2_punpckhwd (t2, d->target, t1));
emit_insn (gen_sse2_punpcklwd (d->target, d->target, t1));
emit_insn (gen_vec_interleave_highv8hi (t1, d->op0, d->op1));
emit_insn (gen_vec_interleave_lowv8hi (d->target, d->op0, d->op1));
emit_insn (gen_vec_interleave_highv8hi (t2, d->target, t1));
emit_insn (gen_vec_interleave_lowv8hi (d->target, d->target, t1));
if (odd)
emit_insn (gen_sse2_punpckhwd (d->target, d->target, t2));
t3 = gen_vec_interleave_highv8hi (d->target, d->target, t2);
else
emit_insn (gen_sse2_punpcklwd (d->target, d->target, t2));
t3 = gen_vec_interleave_lowv8hi (d->target, d->target, t2);
emit_insn (t3);
}
break;
@ -29749,16 +29751,17 @@ expand_vec_perm_even_odd_1 (struct expand_vec_perm_d *d, unsigned odd)
t1 = gen_reg_rtx (V16QImode);
t2 = gen_reg_rtx (V16QImode);
t3 = gen_reg_rtx (V16QImode);
emit_insn (gen_sse2_punpckhbw (t1, d->op0, d->op1));
emit_insn (gen_sse2_punpcklbw (d->target, d->op0, d->op1));
emit_insn (gen_sse2_punpckhbw (t2, d->target, t1));
emit_insn (gen_sse2_punpcklbw (d->target, d->target, t1));
emit_insn (gen_sse2_punpckhbw (t3, d->target, t2));
emit_insn (gen_sse2_punpcklbw (d->target, d->target, t2));
emit_insn (gen_vec_interleave_highv16qi (t1, d->op0, d->op1));
emit_insn (gen_vec_interleave_lowv16qi (d->target, d->op0, d->op1));
emit_insn (gen_vec_interleave_highv16qi (t2, d->target, t1));
emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t1));
emit_insn (gen_vec_interleave_highv16qi (t3, d->target, t2));
emit_insn (gen_vec_interleave_lowv16qi (d->target, d->target, t2));
if (odd)
emit_insn (gen_sse2_punpckhbw (d->target, d->target, t3));
t3 = gen_vec_interleave_highv16qi (d->target, d->target, t3);
else
emit_insn (gen_sse2_punpcklbw (d->target, d->target, t3));
t3 = gen_vec_interleave_lowv16qi (d->target, d->target, t3);
emit_insn (t3);
}
break;

View File

@ -4372,7 +4372,8 @@
}
else
operands[3] = simplify_gen_subreg (V4SFmode, operands[1], SFmode, 0);
emit_insn (gen_sse_unpcklps (operands[3], operands[3], operands[3]));
emit_insn (gen_vec_interleave_lowv4sf (operands[3], operands[3],
operands[3]));
}
else
emit_insn (gen_vec_setv4sf_0 (operands[3],
@ -5769,7 +5770,8 @@
gen_rtx_SUBREG (SImode, operands[1], 0)));
emit_insn (gen_sse2_loadld (operands[4], CONST0_RTX (V4SImode),
gen_rtx_SUBREG (SImode, operands[1], 4)));
emit_insn (gen_sse2_punpckldq (operands[3], operands[3], operands[4]));
emit_insn (gen_vec_interleave_lowv4si (operands[3], operands[3],
operands[4]));
operands[3] = gen_rtx_REG (DImode, REGNO (operands[3]));
})

View File

@ -279,12 +279,14 @@
gen_rtx_SUBREG (SImode, operands[1], 0)));
emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
gen_rtx_SUBREG (SImode, operands[1], 4)));
emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2]));
emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
operands[2]));
}
else if (memory_operand (operands[1], DImode))
emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx));
emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
operands[1], const0_rtx));
else
gcc_unreachable ();
gcc_unreachable ();
})
(define_split
@ -3453,9 +3455,9 @@
emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
gen_lowpart (V2DImode, r1),
gen_lowpart (V2DImode, r2)));
emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
gen_lowpart (V2DImode, r1),
gen_lowpart (V2DImode, r2)));
DONE;
})
@ -3472,9 +3474,9 @@
emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]),
gen_lowpart (V2DImode, r1),
gen_lowpart (V2DImode, r2)));
emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
gen_lowpart (V2DImode, r1),
gen_lowpart (V2DImode, r2)));
DONE;
})
@ -3584,6 +3586,7 @@
[(set_attr "type" "ssemov")
(set_attr "mode" "V4SF,V2SF,V2SF")])
;; Recall that the 256-bit unpck insns only shuffle within their lanes.
(define_insn "avx_unpckhps256"
[(set (match_operand:V8SF 0 "register_operand" "=x")
(vec_select:V8SF
@ -3600,7 +3603,7 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
(define_insn "*avx_unpckhps"
(define_insn "*avx_interleave_highv4sf"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_select:V4SF
(vec_concat:V8SF
@ -3614,7 +3617,7 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V4SF")])
(define_insn "sse_unpckhps"
(define_insn "vec_interleave_highv4sf"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_select:V4SF
(vec_concat:V8SF
@ -3627,6 +3630,7 @@
[(set_attr "type" "sselog")
(set_attr "mode" "V4SF")])
;; Recall that the 256-bit unpck insns only shuffle within their lanes.
(define_insn "avx_unpcklps256"
[(set (match_operand:V8SF 0 "register_operand" "=x")
(vec_select:V8SF
@ -3643,7 +3647,7 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V8SF")])
(define_insn "*avx_unpcklps"
(define_insn "*avx_interleave_lowv4sf"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_select:V4SF
(vec_concat:V8SF
@ -3657,7 +3661,7 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V4SF")])
(define_insn "sse_unpcklps"
(define_insn "vec_interleave_lowv4sf"
[(set (match_operand:V4SF 0 "register_operand" "=x")
(vec_select:V4SF
(vec_concat:V8SF
@ -4464,6 +4468,7 @@
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Recall that the 256-bit unpck insns only shuffle within their lanes.
(define_insn "avx_unpckhpd256"
[(set (match_operand:V4DF 0 "register_operand" "=x")
(vec_select:V4DF
@ -4478,7 +4483,7 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V4DF")])
(define_expand "sse2_unpckhpd_exp"
(define_expand "vec_interleave_highv2df"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "")
(vec_select:V2DF
(vec_concat:V4DF
@ -4489,7 +4494,7 @@
"TARGET_SSE2"
"ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
(define_insn "*avx_unpckhpd"
(define_insn "*avx_interleave_highv2df"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
(vec_select:V2DF
(vec_concat:V4DF
@ -4506,7 +4511,7 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V2DF,V1DF,V1DF")])
(define_insn "sse2_unpckhpd"
(define_insn "*sse2_interleave_highv2df"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
(vec_select:V2DF
(vec_concat:V4DF
@ -4585,6 +4590,7 @@
DONE;
})
;; Recall that the 256-bit unpck insns only shuffle within their lanes.
(define_insn "avx_unpcklpd256"
[(set (match_operand:V4DF 0 "register_operand" "=x")
(vec_select:V4DF
@ -4599,7 +4605,7 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V4DF")])
(define_expand "sse2_unpcklpd_exp"
(define_expand "vec_interleave_lowv2df"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "")
(vec_select:V2DF
(vec_concat:V4DF
@ -4610,7 +4616,7 @@
"TARGET_SSE2"
"ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);")
(define_insn "*avx_unpcklpd"
(define_insn "*avx_interleave_lowv2df"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
(vec_select:V2DF
(vec_concat:V4DF
@ -4627,7 +4633,7 @@
(set_attr "prefix" "vex")
(set_attr "mode" "V2DF,V1DF,V1DF")])
(define_insn "sse2_unpcklpd"
(define_insn "*sse2_interleave_lowv2df"
[(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
(vec_select:V2DF
(vec_concat:V4DF
@ -4721,7 +4727,7 @@
})
;; punpcklqdq and punpckhqdq are shorter than shufpd.
(define_insn "*avx_punpckhqdq"
(define_insn "*avx_interleave_highv2di"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(vec_select:V2DI
(vec_concat:V4DI
@ -4735,7 +4741,7 @@
(set_attr "prefix" "vex")
(set_attr "mode" "TI")])
(define_insn "sse2_punpckhqdq"
(define_insn "vec_interleave_highv2di"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(vec_select:V2DI
(vec_concat:V4DI
@ -4749,7 +4755,7 @@
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "*avx_punpcklqdq"
(define_insn "*avx_interleave_lowv2di"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(vec_select:V2DI
(vec_concat:V4DI
@ -4763,7 +4769,7 @@
(set_attr "prefix" "vex")
(set_attr "mode" "TI")])
(define_insn "sse2_punpcklqdq"
(define_insn "vec_interleave_lowv2di"
[(set (match_operand:V2DI 0 "register_operand" "=x")
(vec_select:V2DI
(vec_concat:V4DI
@ -5247,10 +5253,10 @@
each word. We don't care what goes into the high byte of each word.
Rather than trying to get zero in there, most convenient is to let
it be a copy of the low byte. */
emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1]));
emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2]));
emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1]));
emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2]));
emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
/* Multiply words. The end-of-line annotations here give a picture of what
the output of that instruction looks like. Dot means don't care; the
@ -5670,7 +5676,7 @@
const0_rtx, const0_rtx));
/* Merge the parts back together. */
emit_insn (gen_sse2_punpckldq (op0, t5, t6));
emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
DONE;
})
@ -6589,174 +6595,6 @@
DONE;
})
(define_expand "vec_interleave_highv16qi"
[(set (match_operand:V16QI 0 "register_operand" "")
(vec_select:V16QI
(vec_concat:V32QI
(match_operand:V16QI 1 "register_operand" "")
(match_operand:V16QI 2 "nonimmediate_operand" ""))
(parallel [(const_int 8) (const_int 24)
(const_int 9) (const_int 25)
(const_int 10) (const_int 26)
(const_int 11) (const_int 27)
(const_int 12) (const_int 28)
(const_int 13) (const_int 29)
(const_int 14) (const_int 30)
(const_int 15) (const_int 31)])))]
"TARGET_SSE2"
{
emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
DONE;
})
(define_expand "vec_interleave_lowv16qi"
[(set (match_operand:V16QI 0 "register_operand" "")
(vec_select:V16QI
(vec_concat:V32QI
(match_operand:V16QI 1 "register_operand" "")
(match_operand:V16QI 2 "nonimmediate_operand" ""))
(parallel [(const_int 0) (const_int 16)
(const_int 1) (const_int 17)
(const_int 2) (const_int 18)
(const_int 3) (const_int 19)
(const_int 4) (const_int 20)
(const_int 5) (const_int 21)
(const_int 6) (const_int 22)
(const_int 7) (const_int 23)])))]
"TARGET_SSE2"
{
emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
DONE;
})
(define_expand "vec_interleave_highv8hi"
[(set (match_operand:V8HI 0 "register_operand" "=")
(vec_select:V8HI
(vec_concat:V16HI
(match_operand:V8HI 1 "register_operand" "")
(match_operand:V8HI 2 "nonimmediate_operand" ""))
(parallel [(const_int 4) (const_int 12)
(const_int 5) (const_int 13)
(const_int 6) (const_int 14)
(const_int 7) (const_int 15)])))]
"TARGET_SSE2"
{
emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
DONE;
})
(define_expand "vec_interleave_lowv8hi"
[(set (match_operand:V8HI 0 "register_operand" "")
(vec_select:V8HI
(vec_concat:V16HI
(match_operand:V8HI 1 "register_operand" "")
(match_operand:V8HI 2 "nonimmediate_operand" ""))
(parallel [(const_int 0) (const_int 8)
(const_int 1) (const_int 9)
(const_int 2) (const_int 10)
(const_int 3) (const_int 11)])))]
"TARGET_SSE2"
{
emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
DONE;
})
(define_expand "vec_interleave_highv4si"
[(set (match_operand:V4SI 0 "register_operand" "")
(vec_select:V4SI
(vec_concat:V8SI
(match_operand:V4SI 1 "register_operand" "")
(match_operand:V4SI 2 "nonimmediate_operand" ""))
(parallel [(const_int 2) (const_int 6)
(const_int 3) (const_int 7)])))]
"TARGET_SSE2"
{
emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
DONE;
})
(define_expand "vec_interleave_lowv4si"
[(set (match_operand:V4SI 0 "register_operand" "")
(vec_select:V4SI
(vec_concat:V8SI
(match_operand:V4SI 1 "register_operand" "")
(match_operand:V4SI 2 "nonimmediate_operand" ""))
(parallel [(const_int 0) (const_int 4)
(const_int 1) (const_int 5)])))]
"TARGET_SSE2"
{
emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
DONE;
})
(define_expand "vec_interleave_highv2di"
[(set (match_operand:V2DI 0 "register_operand" "")
(vec_select:V2DI
(vec_concat:V4DI
(match_operand:V2DI 1 "register_operand" "")
(match_operand:V2DI 2 "nonimmediate_operand" ""))
(parallel [(const_int 1)
(const_int 3)])))]
"TARGET_SSE2"
{
emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
DONE;
})
(define_expand "vec_interleave_lowv2di"
[(set (match_operand:V2DI 0 "register_operand" "")
(vec_select:V2DI
(vec_concat:V4DI
(match_operand:V2DI 1 "register_operand" "")
(match_operand:V2DI 2 "nonimmediate_operand" ""))
(parallel [(const_int 0)
(const_int 2)])))]
"TARGET_SSE2"
{
emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
DONE;
})
(define_expand "vec_interleave_highv4sf"
[(set (match_operand:V4SF 0 "register_operand" "")
(vec_select:V4SF
(vec_concat:V8SF
(match_operand:V4SF 1 "register_operand" "")
(match_operand:V4SF 2 "nonimmediate_operand" ""))
(parallel [(const_int 2) (const_int 6)
(const_int 3) (const_int 7)])))]
"TARGET_SSE")
(define_expand "vec_interleave_lowv4sf"
[(set (match_operand:V4SF 0 "register_operand" "")
(vec_select:V4SF
(vec_concat:V8SF
(match_operand:V4SF 1 "register_operand" "")
(match_operand:V4SF 2 "nonimmediate_operand" ""))
(parallel [(const_int 0) (const_int 4)
(const_int 1) (const_int 5)])))]
"TARGET_SSE")
(define_expand "vec_interleave_highv2df"
[(set (match_operand:V2DF 0 "register_operand" "")
(vec_select:V2DF
(vec_concat:V4DF
(match_operand:V2DF 1 "register_operand" "")
(match_operand:V2DF 2 "nonimmediate_operand" ""))
(parallel [(const_int 1)
(const_int 3)])))]
"TARGET_SSE2")
(define_expand "vec_interleave_lowv2df"
[(set (match_operand:V2DF 0 "register_operand" "")
(vec_select:V2DF
(vec_concat:V4DF
(match_operand:V2DF 1 "register_operand" "")
(match_operand:V2DF 2 "nonimmediate_operand" ""))
(parallel [(const_int 0)
(const_int 2)])))]
"TARGET_SSE2")
(define_insn "*avx_packsswb"
[(set (match_operand:V16QI 0 "register_operand" "=x")
(vec_concat:V16QI
@ -6835,7 +6673,7 @@
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "*avx_punpckhbw"
(define_insn "*avx_interleave_highv16qi"
[(set (match_operand:V16QI 0 "register_operand" "=x")
(vec_select:V16QI
(vec_concat:V32QI
@ -6855,7 +6693,7 @@
(set_attr "prefix" "vex")
(set_attr "mode" "TI")])
(define_insn "sse2_punpckhbw"
(define_insn "vec_interleave_highv16qi"
[(set (match_operand:V16QI 0 "register_operand" "=x")
(vec_select:V16QI
(vec_concat:V32QI
@ -6875,7 +6713,7 @@
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "*avx_punpcklbw"
(define_insn "*avx_interleave_lowv16qi"
[(set (match_operand:V16QI 0 "register_operand" "=x")
(vec_select:V16QI
(vec_concat:V32QI
@ -6895,7 +6733,7 @@
(set_attr "prefix" "vex")
(set_attr "mode" "TI")])
(define_insn "sse2_punpcklbw"
(define_insn "vec_interleave_lowv16qi"
[(set (match_operand:V16QI 0 "register_operand" "=x")
(vec_select:V16QI
(vec_concat:V32QI
@ -6915,7 +6753,7 @@
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "*avx_punpckhwd"
(define_insn "*avx_interleave_highv8hi"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(vec_select:V8HI
(vec_concat:V16HI
@ -6931,7 +6769,7 @@
(set_attr "prefix" "vex")
(set_attr "mode" "TI")])
(define_insn "sse2_punpckhwd"
(define_insn "vec_interleave_highv8hi"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(vec_select:V8HI
(vec_concat:V16HI
@ -6947,7 +6785,7 @@
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "*avx_punpcklwd"
(define_insn "*avx_interleave_lowv8hi"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(vec_select:V8HI
(vec_concat:V16HI
@ -6963,7 +6801,7 @@
(set_attr "prefix" "vex")
(set_attr "mode" "TI")])
(define_insn "sse2_punpcklwd"
(define_insn "vec_interleave_lowv8hi"
[(set (match_operand:V8HI 0 "register_operand" "=x")
(vec_select:V8HI
(vec_concat:V16HI
@ -6979,7 +6817,7 @@
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "*avx_punpckhdq"
(define_insn "*avx_interleave_highv4si"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(vec_select:V4SI
(vec_concat:V8SI
@ -6993,7 +6831,7 @@
(set_attr "prefix" "vex")
(set_attr "mode" "TI")])
(define_insn "sse2_punpckhdq"
(define_insn "vec_interleave_highv4si"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(vec_select:V4SI
(vec_concat:V8SI
@ -7007,7 +6845,7 @@
(set_attr "prefix_data16" "1")
(set_attr "mode" "TI")])
(define_insn "*avx_punpckldq"
(define_insn "*avx_interleave_lowv4si"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(vec_select:V4SI
(vec_concat:V8SI
@ -7021,7 +6859,7 @@
(set_attr "prefix" "vex")
(set_attr "mode" "TI")])
(define_insn "sse2_punpckldq"
(define_insn "vec_interleave_lowv4si"
[(set (match_operand:V4SI 0 "register_operand" "=x")
(vec_select:V4SI
(vec_concat:V8SI