mirror of
https://gcc.gnu.org/git/gcc.git
synced 2025-01-16 16:54:27 +08:00
[multiple changes]
2006-11-08 Dorit Nuzman <dorit@il.ibm.com> * tree-vect-analyze.c (vect_mark_relevant, vect_stmt_relevant_p): Take enum argument instead of bool. (vect_analyze_operations): Call vectorizable_type_promotion. * tree-vectorizer.h (type_promotion_vec_info_type): New enum stmt_vec_info_type value. (supportable_widening_operation, vectorizable_type_promotion): New function declarations. * tree-vect-transform.c (vect_gen_widened_results_half): New function. (vectorizable_type_promotion): New function. (vect_transform_stmt): Call vectorizable_type_promotion. * tree-vect-analyze.c (supportable_widening_operation): New function. * tree-vect-patterns.c (vect_recog_dot_prod_pattern): Add implementation. * tree-vect-generic.c (expand_vector_operations_1): Consider correct mode. * tree.def (VEC_WIDEN_MULT_HI_EXPR, VEC_WIDEN_MULT_LO_EXPR): (VEC_UNPACK_HI_EXPR, VEC_UNPACK_LO_EXPR): New tree-codes. * tree-inline.c (estimate_num_insns_1): Add cases for above new tree-codes. * tree-pretty-print.c (dump_generic_node, op_prio): Likewise. * expr.c (expand_expr_real_1): Likewise. * optabs.c (optab_for_tree_code): Likewise. (init_optabs): Initialize new optabs. * genopinit.c (vec_widen_umult_hi_optab, vec_widen_smult_hi_optab, vec_widen_smult_hi_optab, vec_widen_smult_lo_optab, vec_unpacks_hi_optab, vec_unpacks_lo_optab, vec_unpacku_hi_optab, vec_unpacku_lo_optab): Initialize new optabs. * optabs.h (OTI_vec_widen_umult_hi, OTI_vec_widen_umult_lo): (OTI_vec_widen_smult_h, OTI_vec_widen_smult_lo, OTI_vec_unpacks_hi, OTI_vec_unpacks_lo, OTI_vec_unpacku_hi, OTI_vec_unpacku_lo): New optab indices. (vec_widen_umult_hi_optab, vec_widen_umult_lo_optab): (vec_widen_smult_hi_optab, vec_widen_smult_lo_optab): (vec_unpacks_hi_optab, vec_unpacku_hi_optab, vec_unpacks_lo_optab): (vec_unpacku_lo_optab): New optabs. * doc/md.texi (vec_unpacks_hi, vec_unpacks_lo, vec_unpacku_hi): (vec_unpacku_lo, vec_widen_umult_hi, vec_widen_umult_lo): (vec_widen_smult_hi, vec_widen_smult_lo): New. * doc/c-tree.texi (VEC_LSHIFT_EXPR, VEC_RSHIFT_EXPR): (VEC_WIDEN_MULT_HI_EXPR, VEC_WIDEN_MULT_LO_EXPR, VEC_UNPACK_HI_EXPR): (VEC_UNPACK_LO_EXPR, VEC_PACK_MOD_EXPR, VEC_PACK_SAT_EXPR): New. * config/rs6000/altivec.md (UNSPEC_VMULWHUB, UNSPEC_VMULWLUB): (UNSPEC_VMULWHSB, UNSPEC_VMULWLSB, UNSPEC_VMULWHUH, UNSPEC_VMULWLUH): (UNSPEC_VMULWHSH, UNSPEC_VMULWLSH): New. (UNSPEC_VPERMSI, UNSPEC_VPERMHI): New. (vec_vperm_v8hiv4si, vec_vperm_v16qiv8hi): New patterns used to implement the unsigned unpacking patterns. (vec_unpacks_hi_v16qi, vec_unpacks_hi_v8hi, vec_unpacks_lo_v16qi): (vec_unpacks_lo_v8hi): New signed unpacking patterns. (vec_unpacku_hi_v16qi, vec_unpacku_hi_v8hi, vec_unpacku_lo_v16qi): (vec_unpacku_lo_v8hi): New unsigned unpacking patterns. (vec_widen_umult_hi_v16qi, vec_widen_umult_lo_v16qi): (vec_widen_smult_hi_v16qi, vec_widen_smult_lo_v16qi): (vec_widen_umult_hi_v8hi, vec_widen_umult_lo_v8hi): (vec_widen_smult_hi_v8hi, vec_widen_smult_lo_v8hi): New widening multiplication patterns. * target.h (builtin_mul_widen_even, builtin_mul_widen_odd): New. * target-def.h (TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN): (TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD): New. * config/rs6000/rs6000.c (rs6000_builtin_mul_widen_even): New. (rs6000_builtin_mul_widen_odd): New. (TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN): Defined. (TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD): Defined. * tree-vectorizer.h (enum vect_relevant): New enum type. (_stmt_vec_info): Field relevant chaned from bool to enum vect_relevant. (STMT_VINFO_RELEVANT_P): Updated. (STMT_VINFO_RELEVANT): New. * tree-vectorizer.c (new_stmt_vec_info): Use STMT_VINFO_RELEVANT instead of STMT_VINFO_RELEVANT_P. * tree-vect-analyze.c (vect_mark_relevant, vect_stmt_relevant_p): Replace calls to STMT_VINFO_RELEVANT_P with STMT_VINFO_RELEVANT, and boolean variable with enum vect_relevant. (vect_mark_stmts_to_be_vectorized): Likewise + update documentation. * doc/tm.texi (TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN): New. (TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD): New. 2006-11-08 Richard Henderson <rth@redhat.com> * config/i386/sse.md (vec_widen_umult_hi_v8hi, vec_widen_umult_lo_v8hi): New. (vec_widen_smult_hi_v4si, vec_widen_smult_lo_v4si, vec_widen_umult_hi_v4si, vec_widen_umult_lo_v4si): New. * config/i386/i386.c (ix86_expand_sse_unpack): New. * config/i386/i386-protos.h (ix86_expand_sse_unpack): New. * config/i386/sse.md (vec_unpacku_hi_v16qi, vec_unpacks_hi_v16qi, vec_unpacku_lo_v16qi, vec_unpacks_lo_v16qi, vec_unpacku_hi_v8hi, vec_unpacks_hi_v8hi, vec_unpacku_lo_v8hi, vec_unpacks_lo_v8hi, vec_unpacku_hi_v4si, vec_unpacks_hi_v4si, vec_unpacku_lo_v4si, vec_unpacks_lo_v4si): New. 2006-11-08 Dorit Nuzman <dorit@il.ibm.com> * tree-vect-transform.c (vectorizable_type_demotion): New function. (vect_transform_stmt): Add case for type_demotion_vec_info_type. (vect_analyze_operations): Call vectorizable_type_demotion. * tree-vectorizer.h (type_demotion_vec_info_type): New enum stmt_vec_info_type value. (vectorizable_type_demotion): New function declaration. * tree-vect-generic.c (expand_vector_operations_1): Consider correct mode. * tree.def (VEC_PACK_MOD_EXPR, VEC_PACK_SAT_EXPR): New tree-codes. * expr.c (expand_expr_real_1): Add case for VEC_PACK_MOD_EXPR and VEC_PACK_SAT_EXPR. * tree-iniline.c (estimate_num_insns_1): Likewise. * tree-pretty-print.c (dump_generic_node, op_prio): Likewise. * optabs.c (optab_for_tree_code): Likewise. * optabs.c (expand_binop): In case of vec_pack_*_optabs the mode compared against the predicate of the result is not 'mode' (the input to the function) but a mode with half the size of 'mode'. (init_optab): Initialize new optabs. * optabs.h (OTI_vec_pack_mod, OTI_vec_pack_ssat, OTI_vec_pack_usat): New optab indices. (vec_pack_mod_optab, vec_pack_ssat_optab, vec_pack_usat_optab): New optabs. * genopinit.c (vec_pack_mod_optab, vec_pack_ssat_optab): (vec_pack_usat_optab): Initialize new optabs. * doc/md.texi (vec_pack_mod, vec_pack_ssat, vec_pack_usat): New. * config/rs6000/altivec.md (vec_pack_mod_v8hi, vec_pack_mod_v4si): New. 2006-11-08 Richard Henderson <rth@redehat.com> * config/i386/sse.md (vec_pack_mod_v8hi, vec_pack_mod_v4si): (vec_pack_mod_v2di, vec_interleave_highv16qi, vec_interleave_lowv16qi): (vec_interleave_highv8hi, vec_interleave_lowv8hi): (vec_interleave_highv4si, vec_interleave_lowv4si): (vec_interleave_highv2di, vec_interleave_lowv2di): New. 2006-11-08 Dorit Nuzman <dorit@il.ibm.com> * tree-vect-transform.c (vectorizable_reduction): Support multiple datatypes. (vect_transform_stmt): Removed redundant code. 2006-11-08 Dorit Nuzman <dorit@il.ibm.com> * tree-vect-transform.c (vectorizable_operation): Support multiple datatypes. 2006-11-08 Dorit Nuzman <dorit@il.ibm.com> * tree-vect-transform.c (vect_align_data_ref): Removed. (vect_create_data_ref_ptr): Added additional argument - ptr_incr. Updated function documentation. Return the increment stmt in ptr_incr. (bump_vector_ptr): New function. (vect_get_vec_def_for_stmt_copy): New function. (vect_finish_stmt_generation): Create a stmt_info to newly created vector stmts. (vect_setup_realignment): Call vect_create_data_ref_ptr with additional argument. (vectorizable_reduction, vectorizable_assignment): Not supported yet if VF is greater than the number of elements that can fit in one vector word. (vectorizable_operation, vectorizable_condition): Likewise. (vectorizable_store, vectorizable_load): Support the case that the VF is greater than the number of elements that can fit in one vector word. (vect_transform_loop): Don't fail in case of multiple data-types. * tree-vect-analyze.c (vect_determine_vectorization_factor): Don't fail in case of multiple data-types; the smallest type determines the VF. (vect_analyze_data_ref_dependence): Don't record datarefs as same_align if they are of different sizes. (vect_update_misalignment_for_peel): Compare misalignments in terms of number of elements rather than number of bytes. (vect_enhance_data_refs_alignment): Fix/Add dump printouts. (vect_can_advance_ivs_p): Fix a dump printout From-SVN: r118577
This commit is contained in:
parent
6300f037d0
commit
89d67ccabb
174
gcc/ChangeLog
174
gcc/ChangeLog
@ -1,3 +1,177 @@
|
||||
2006-11-08 Dorit Nuzman <dorit@il.ibm.com>
|
||||
|
||||
* tree-vect-analyze.c (vect_mark_relevant, vect_stmt_relevant_p): Take
|
||||
enum argument instead of bool.
|
||||
(vect_analyze_operations): Call vectorizable_type_promotion.
|
||||
* tree-vectorizer.h (type_promotion_vec_info_type): New enum
|
||||
stmt_vec_info_type value.
|
||||
(supportable_widening_operation, vectorizable_type_promotion): New
|
||||
function declarations.
|
||||
* tree-vect-transform.c (vect_gen_widened_results_half): New function.
|
||||
(vectorizable_type_promotion): New function.
|
||||
(vect_transform_stmt): Call vectorizable_type_promotion.
|
||||
* tree-vect-analyze.c (supportable_widening_operation): New function.
|
||||
* tree-vect-patterns.c (vect_recog_dot_prod_pattern):
|
||||
Add implementation.
|
||||
* tree-vect-generic.c (expand_vector_operations_1): Consider correct
|
||||
mode.
|
||||
|
||||
* tree.def (VEC_WIDEN_MULT_HI_EXPR, VEC_WIDEN_MULT_LO_EXPR):
|
||||
(VEC_UNPACK_HI_EXPR, VEC_UNPACK_LO_EXPR): New tree-codes.
|
||||
* tree-inline.c (estimate_num_insns_1): Add cases for above new
|
||||
tree-codes.
|
||||
* tree-pretty-print.c (dump_generic_node, op_prio): Likewise.
|
||||
* expr.c (expand_expr_real_1): Likewise.
|
||||
* optabs.c (optab_for_tree_code): Likewise.
|
||||
(init_optabs): Initialize new optabs.
|
||||
* genopinit.c (vec_widen_umult_hi_optab, vec_widen_smult_hi_optab,
|
||||
vec_widen_smult_hi_optab, vec_widen_smult_lo_optab,
|
||||
vec_unpacks_hi_optab, vec_unpacks_lo_optab, vec_unpacku_hi_optab,
|
||||
vec_unpacku_lo_optab): Initialize new optabs.
|
||||
* optabs.h (OTI_vec_widen_umult_hi, OTI_vec_widen_umult_lo):
|
||||
(OTI_vec_widen_smult_h, OTI_vec_widen_smult_lo, OTI_vec_unpacks_hi,
|
||||
OTI_vec_unpacks_lo, OTI_vec_unpacku_hi, OTI_vec_unpacku_lo): New
|
||||
optab indices.
|
||||
(vec_widen_umult_hi_optab, vec_widen_umult_lo_optab):
|
||||
(vec_widen_smult_hi_optab, vec_widen_smult_lo_optab):
|
||||
(vec_unpacks_hi_optab, vec_unpacku_hi_optab, vec_unpacks_lo_optab):
|
||||
(vec_unpacku_lo_optab): New optabs.
|
||||
* doc/md.texi (vec_unpacks_hi, vec_unpacks_lo, vec_unpacku_hi):
|
||||
(vec_unpacku_lo, vec_widen_umult_hi, vec_widen_umult_lo):
|
||||
(vec_widen_smult_hi, vec_widen_smult_lo): New.
|
||||
* doc/c-tree.texi (VEC_LSHIFT_EXPR, VEC_RSHIFT_EXPR):
|
||||
(VEC_WIDEN_MULT_HI_EXPR, VEC_WIDEN_MULT_LO_EXPR, VEC_UNPACK_HI_EXPR):
|
||||
(VEC_UNPACK_LO_EXPR, VEC_PACK_MOD_EXPR, VEC_PACK_SAT_EXPR): New.
|
||||
|
||||
* config/rs6000/altivec.md (UNSPEC_VMULWHUB, UNSPEC_VMULWLUB):
|
||||
(UNSPEC_VMULWHSB, UNSPEC_VMULWLSB, UNSPEC_VMULWHUH, UNSPEC_VMULWLUH):
|
||||
(UNSPEC_VMULWHSH, UNSPEC_VMULWLSH): New.
|
||||
(UNSPEC_VPERMSI, UNSPEC_VPERMHI): New.
|
||||
(vec_vperm_v8hiv4si, vec_vperm_v16qiv8hi): New patterns used to
|
||||
implement the unsigned unpacking patterns.
|
||||
(vec_unpacks_hi_v16qi, vec_unpacks_hi_v8hi, vec_unpacks_lo_v16qi):
|
||||
(vec_unpacks_lo_v8hi): New signed unpacking patterns.
|
||||
(vec_unpacku_hi_v16qi, vec_unpacku_hi_v8hi, vec_unpacku_lo_v16qi):
|
||||
(vec_unpacku_lo_v8hi): New unsigned unpacking patterns.
|
||||
(vec_widen_umult_hi_v16qi, vec_widen_umult_lo_v16qi):
|
||||
(vec_widen_smult_hi_v16qi, vec_widen_smult_lo_v16qi):
|
||||
(vec_widen_umult_hi_v8hi, vec_widen_umult_lo_v8hi):
|
||||
(vec_widen_smult_hi_v8hi, vec_widen_smult_lo_v8hi): New widening
|
||||
multiplication patterns.
|
||||
|
||||
* target.h (builtin_mul_widen_even, builtin_mul_widen_odd): New.
|
||||
* target-def.h (TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN):
|
||||
(TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD): New.
|
||||
* config/rs6000/rs6000.c (rs6000_builtin_mul_widen_even): New.
|
||||
(rs6000_builtin_mul_widen_odd): New.
|
||||
(TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN): Defined.
|
||||
(TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD): Defined.
|
||||
* tree-vectorizer.h (enum vect_relevant): New enum type.
|
||||
(_stmt_vec_info): Field relevant chaned from bool to enum
|
||||
vect_relevant.
|
||||
(STMT_VINFO_RELEVANT_P): Updated.
|
||||
(STMT_VINFO_RELEVANT): New.
|
||||
* tree-vectorizer.c (new_stmt_vec_info): Use STMT_VINFO_RELEVANT
|
||||
instead of STMT_VINFO_RELEVANT_P.
|
||||
* tree-vect-analyze.c (vect_mark_relevant, vect_stmt_relevant_p):
|
||||
Replace calls to STMT_VINFO_RELEVANT_P with STMT_VINFO_RELEVANT,
|
||||
and boolean variable with enum vect_relevant.
|
||||
(vect_mark_stmts_to_be_vectorized): Likewise + update documentation.
|
||||
* doc/tm.texi (TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN): New.
|
||||
(TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD): New.
|
||||
|
||||
2006-11-08 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* config/i386/sse.md (vec_widen_umult_hi_v8hi,
|
||||
vec_widen_umult_lo_v8hi): New.
|
||||
(vec_widen_smult_hi_v4si, vec_widen_smult_lo_v4si,
|
||||
vec_widen_umult_hi_v4si, vec_widen_umult_lo_v4si): New.
|
||||
|
||||
* config/i386/i386.c (ix86_expand_sse_unpack): New.
|
||||
* config/i386/i386-protos.h (ix86_expand_sse_unpack): New.
|
||||
* config/i386/sse.md (vec_unpacku_hi_v16qi, vec_unpacks_hi_v16qi,
|
||||
vec_unpacku_lo_v16qi, vec_unpacks_lo_v16qi, vec_unpacku_hi_v8hi,
|
||||
vec_unpacks_hi_v8hi, vec_unpacku_lo_v8hi, vec_unpacks_lo_v8hi,
|
||||
vec_unpacku_hi_v4si, vec_unpacks_hi_v4si, vec_unpacku_lo_v4si,
|
||||
vec_unpacks_lo_v4si): New.
|
||||
|
||||
2006-11-08 Dorit Nuzman <dorit@il.ibm.com>
|
||||
|
||||
* tree-vect-transform.c (vectorizable_type_demotion): New function.
|
||||
(vect_transform_stmt): Add case for type_demotion_vec_info_type.
|
||||
(vect_analyze_operations): Call vectorizable_type_demotion.
|
||||
* tree-vectorizer.h (type_demotion_vec_info_type): New enum
|
||||
stmt_vec_info_type value.
|
||||
(vectorizable_type_demotion): New function declaration.
|
||||
* tree-vect-generic.c (expand_vector_operations_1): Consider correct
|
||||
mode.
|
||||
|
||||
* tree.def (VEC_PACK_MOD_EXPR, VEC_PACK_SAT_EXPR): New tree-codes.
|
||||
* expr.c (expand_expr_real_1): Add case for VEC_PACK_MOD_EXPR and
|
||||
VEC_PACK_SAT_EXPR.
|
||||
* tree-iniline.c (estimate_num_insns_1): Likewise.
|
||||
* tree-pretty-print.c (dump_generic_node, op_prio): Likewise.
|
||||
* optabs.c (optab_for_tree_code): Likewise.
|
||||
|
||||
* optabs.c (expand_binop): In case of vec_pack_*_optabs the mode
|
||||
compared against the predicate of the result is not 'mode' (the input
|
||||
to the function) but a mode with half the size of 'mode'.
|
||||
(init_optab): Initialize new optabs.
|
||||
* optabs.h (OTI_vec_pack_mod, OTI_vec_pack_ssat, OTI_vec_pack_usat):
|
||||
New optab indices.
|
||||
(vec_pack_mod_optab, vec_pack_ssat_optab, vec_pack_usat_optab): New
|
||||
optabs.
|
||||
* genopinit.c (vec_pack_mod_optab, vec_pack_ssat_optab):
|
||||
(vec_pack_usat_optab): Initialize new optabs.
|
||||
* doc/md.texi (vec_pack_mod, vec_pack_ssat, vec_pack_usat): New.
|
||||
* config/rs6000/altivec.md (vec_pack_mod_v8hi, vec_pack_mod_v4si): New.
|
||||
|
||||
2006-11-08 Richard Henderson <rth@redehat.com>
|
||||
|
||||
* config/i386/sse.md (vec_pack_mod_v8hi, vec_pack_mod_v4si):
|
||||
(vec_pack_mod_v2di, vec_interleave_highv16qi, vec_interleave_lowv16qi):
|
||||
(vec_interleave_highv8hi, vec_interleave_lowv8hi):
|
||||
(vec_interleave_highv4si, vec_interleave_lowv4si):
|
||||
(vec_interleave_highv2di, vec_interleave_lowv2di): New.
|
||||
|
||||
2006-11-08 Dorit Nuzman <dorit@il.ibm.com>
|
||||
|
||||
* tree-vect-transform.c (vectorizable_reduction): Support multiple
|
||||
datatypes.
|
||||
(vect_transform_stmt): Removed redundant code.
|
||||
|
||||
2006-11-08 Dorit Nuzman <dorit@il.ibm.com>
|
||||
|
||||
* tree-vect-transform.c (vectorizable_operation): Support multiple
|
||||
datatypes.
|
||||
|
||||
2006-11-08 Dorit Nuzman <dorit@il.ibm.com>
|
||||
|
||||
* tree-vect-transform.c (vect_align_data_ref): Removed.
|
||||
(vect_create_data_ref_ptr): Added additional argument - ptr_incr.
|
||||
Updated function documentation. Return the increment stmt in ptr_incr.
|
||||
(bump_vector_ptr): New function.
|
||||
(vect_get_vec_def_for_stmt_copy): New function.
|
||||
(vect_finish_stmt_generation): Create a stmt_info to newly created
|
||||
vector stmts.
|
||||
(vect_setup_realignment): Call vect_create_data_ref_ptr with additional
|
||||
argument.
|
||||
(vectorizable_reduction, vectorizable_assignment): Not supported yet if
|
||||
VF is greater than the number of elements that can fit in one vector
|
||||
word.
|
||||
(vectorizable_operation, vectorizable_condition): Likewise.
|
||||
(vectorizable_store, vectorizable_load): Support the case that the VF
|
||||
is greater than the number of elements that can fit in one vector word.
|
||||
(vect_transform_loop): Don't fail in case of multiple data-types.
|
||||
* tree-vect-analyze.c (vect_determine_vectorization_factor): Don't fail
|
||||
in case of multiple data-types; the smallest type determines the VF.
|
||||
(vect_analyze_data_ref_dependence): Don't record datarefs as same_align
|
||||
if they are of different sizes.
|
||||
(vect_update_misalignment_for_peel): Compare misalignments in terms of
|
||||
number of elements rather than number of bytes.
|
||||
(vect_enhance_data_refs_alignment): Fix/Add dump printouts.
|
||||
(vect_can_advance_ivs_p): Fix a dump printout
|
||||
|
||||
2006-11-07 Eric Christopher <echristo@apple.com>
|
||||
|
||||
* libgcc2.c (__bswapdi2): Rename from bswapDI2.
|
||||
|
@ -105,6 +105,7 @@ extern int ix86_expand_int_movcc (rtx[]);
|
||||
extern int ix86_expand_fp_movcc (rtx[]);
|
||||
extern bool ix86_expand_fp_vcond (rtx[]);
|
||||
extern bool ix86_expand_int_vcond (rtx[]);
|
||||
extern void ix86_expand_sse_unpack (rtx[], bool, bool);
|
||||
extern int ix86_expand_int_addcc (rtx[]);
|
||||
extern void ix86_expand_call (rtx, rtx, rtx, rtx, rtx, int);
|
||||
extern void x86_initialize_trampoline (rtx, rtx, rtx);
|
||||
|
@ -11733,6 +11733,52 @@ ix86_expand_int_vcond (rtx operands[])
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
|
||||
true if we should do zero extension, else sign extension. HIGH_P is
|
||||
true if we want the N/2 high elements, else the low elements. */
|
||||
|
||||
void
|
||||
ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
|
||||
{
|
||||
enum machine_mode imode = GET_MODE (operands[1]);
|
||||
rtx (*unpack)(rtx, rtx, rtx);
|
||||
rtx se, dest;
|
||||
|
||||
switch (imode)
|
||||
{
|
||||
case V16QImode:
|
||||
if (high_p)
|
||||
unpack = gen_vec_interleave_highv16qi;
|
||||
else
|
||||
unpack = gen_vec_interleave_lowv16qi;
|
||||
break;
|
||||
case V8HImode:
|
||||
if (high_p)
|
||||
unpack = gen_vec_interleave_highv8hi;
|
||||
else
|
||||
unpack = gen_vec_interleave_lowv8hi;
|
||||
break;
|
||||
case V4SImode:
|
||||
if (high_p)
|
||||
unpack = gen_vec_interleave_highv4si;
|
||||
else
|
||||
unpack = gen_vec_interleave_lowv4si;
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
dest = gen_lowpart (imode, operands[0]);
|
||||
|
||||
if (unsigned_p)
|
||||
se = force_reg (imode, CONST0_RTX (imode));
|
||||
else
|
||||
se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
|
||||
operands[1], pc_rtx, pc_rtx);
|
||||
|
||||
emit_insn (unpack (dest, operands[1], se));
|
||||
}
|
||||
|
||||
/* Expand conditional increment or decrement using adb/sbb instructions.
|
||||
The default case using setcc followed by the conditional move can be
|
||||
done by generic code. */
|
||||
@ -14863,7 +14909,7 @@ static const struct builtin_description bdesc_2arg[] =
|
||||
{ MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
|
||||
|
||||
{ MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
|
||||
{ MASK_SSE2, CODE_FOR_sse2_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
|
||||
{ MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
|
||||
|
||||
{ MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
|
||||
{ MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
|
||||
@ -14898,7 +14944,7 @@ static const struct builtin_description bdesc_2arg[] =
|
||||
{ MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
|
||||
{ MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
|
||||
|
||||
{ MASK_SSE2, CODE_FOR_sse2_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
|
||||
{ MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
|
||||
{ MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
|
||||
|
||||
{ MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
|
||||
|
@ -2620,7 +2620,20 @@
|
||||
[(set_attr "type" "sseimul")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "sse2_smulv8hi3_highpart"
|
||||
(define_insn "smulv8hi3_highpart"
|
||||
[(set (match_operand:V8HI 0 "register_operand" "")
|
||||
(truncate:V8HI
|
||||
(lshiftrt:V8SI
|
||||
(mult:V8SI
|
||||
(sign_extend:V8SI
|
||||
(match_operand:V8HI 1 "nonimmediate_operand" ""))
|
||||
(sign_extend:V8SI
|
||||
(match_operand:V8HI 2 "nonimmediate_operand" "")))
|
||||
(const_int 16))))]
|
||||
"TARGET_SSE2"
|
||||
"ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
|
||||
|
||||
(define_insn "*smulv8hi3_highpart"
|
||||
[(set (match_operand:V8HI 0 "register_operand" "=x")
|
||||
(truncate:V8HI
|
||||
(lshiftrt:V8SI
|
||||
@ -2635,7 +2648,20 @@
|
||||
[(set_attr "type" "sseimul")
|
||||
(set_attr "mode" "TI")])
|
||||
|
||||
(define_insn "sse2_umulv8hi3_highpart"
|
||||
(define_insn "umulv8hi3_highpart"
|
||||
[(set (match_operand:V8HI 0 "register_operand" "")
|
||||
(truncate:V8HI
|
||||
(lshiftrt:V8SI
|
||||
(mult:V8SI
|
||||
(zero_extend:V8SI
|
||||
(match_operand:V8HI 1 "nonimmediate_operand" ""))
|
||||
(zero_extend:V8SI
|
||||
(match_operand:V8HI 2 "nonimmediate_operand" "")))
|
||||
(const_int 16))))]
|
||||
"TARGET_SSE2"
|
||||
"ix86_fixup_binary_operands_no_copy (MULT, V8HImode, operands);")
|
||||
|
||||
(define_insn "*umulv8hi3_highpart"
|
||||
[(set (match_operand:V8HI 0 "register_operand" "=x")
|
||||
(truncate:V8HI
|
||||
(lshiftrt:V8SI
|
||||
@ -2792,6 +2818,122 @@
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_widen_umult_hi_v8hi"
|
||||
[(match_operand:V4SI 0 "register_operand" "")
|
||||
(match_operand:V8HI 1 "register_operand" "")
|
||||
(match_operand:V8HI 2 "register_operand" "")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
rtx op1, op2, t1, t2, dest;
|
||||
|
||||
op1 = operands[1];
|
||||
op2 = operands[2];
|
||||
t1 = gen_reg_rtx (V8HImode);
|
||||
t2 = gen_reg_rtx (V8HImode);
|
||||
dest = gen_lowpart (V8HImode, operands[0]);
|
||||
|
||||
emit_insn (gen_mulv8hi3 (t1, op1, op2));
|
||||
emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
|
||||
emit_insn (gen_vec_interleave_highv8hi (dest, t1, t2));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_widen_umult_lo_v8hi"
|
||||
[(match_operand:V4SI 0 "register_operand" "")
|
||||
(match_operand:V8HI 1 "register_operand" "")
|
||||
(match_operand:V8HI 2 "register_operand" "")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
rtx op1, op2, t1, t2, dest;
|
||||
|
||||
op1 = operands[1];
|
||||
op2 = operands[2];
|
||||
t1 = gen_reg_rtx (V8HImode);
|
||||
t2 = gen_reg_rtx (V8HImode);
|
||||
dest = gen_lowpart (V8HImode, operands[0]);
|
||||
|
||||
emit_insn (gen_mulv8hi3 (t1, op1, op2));
|
||||
emit_insn (gen_umulv8hi3_highpart (t2, op1, op2));
|
||||
emit_insn (gen_vec_interleave_lowv8hi (dest, t1, t2));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_widen_smult_hi_v4si"
|
||||
[(match_operand:V2DI 0 "register_operand" "")
|
||||
(match_operand:V4SI 1 "register_operand" "")
|
||||
(match_operand:V4SI 2 "register_operand" "")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
rtx op1, op2, t1, t2;
|
||||
|
||||
op1 = operands[1];
|
||||
op2 = operands[2];
|
||||
t1 = gen_reg_rtx (V4SImode);
|
||||
t2 = gen_reg_rtx (V4SImode);
|
||||
|
||||
emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
|
||||
emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
|
||||
emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_widen_smult_lo_v4si"
|
||||
[(match_operand:V2DI 0 "register_operand" "")
|
||||
(match_operand:V4SI 1 "register_operand" "")
|
||||
(match_operand:V4SI 2 "register_operand" "")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
rtx op1, op2, t1, t2;
|
||||
|
||||
op1 = operands[1];
|
||||
op2 = operands[2];
|
||||
t1 = gen_reg_rtx (V4SImode);
|
||||
t2 = gen_reg_rtx (V4SImode);
|
||||
|
||||
emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
|
||||
emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
|
||||
emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_widen_umult_hi_v4si"
|
||||
[(match_operand:V2DI 0 "register_operand" "")
|
||||
(match_operand:V4SI 1 "register_operand" "")
|
||||
(match_operand:V4SI 2 "register_operand" "")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
rtx op1, op2, t1, t2;
|
||||
|
||||
op1 = operands[1];
|
||||
op2 = operands[2];
|
||||
t1 = gen_reg_rtx (V4SImode);
|
||||
t2 = gen_reg_rtx (V4SImode);
|
||||
|
||||
emit_insn (gen_vec_interleave_highv4si (t1, op1, op1));
|
||||
emit_insn (gen_vec_interleave_highv4si (t2, op2, op2));
|
||||
emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_widen_umult_lo_v4si"
|
||||
[(match_operand:V2DI 0 "register_operand" "")
|
||||
(match_operand:V4SI 1 "register_operand" "")
|
||||
(match_operand:V4SI 2 "register_operand" "")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
rtx op1, op2, t1, t2;
|
||||
|
||||
op1 = operands[1];
|
||||
op2 = operands[2];
|
||||
t1 = gen_reg_rtx (V4SImode);
|
||||
t2 = gen_reg_rtx (V4SImode);
|
||||
|
||||
emit_insn (gen_vec_interleave_lowv4si (t1, op1, op1));
|
||||
emit_insn (gen_vec_interleave_lowv4si (t2, op2, op2));
|
||||
emit_insn (gen_sse2_umulv2siv2di3 (operands[0], t1, t2));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "sdot_prodv8hi"
|
||||
[(match_operand:V4SI 0 "register_operand" "")
|
||||
(match_operand:V8HI 1 "nonimmediate_operand" "")
|
||||
@ -3215,6 +3357,227 @@
|
||||
;;
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Reduce:
|
||||
;; op1 = abcdefghijklmnop
|
||||
;; op2 = qrstuvwxyz012345
|
||||
;; h1 = aqbrcsdteufvgwhx
|
||||
;; l1 = iyjzk0l1m2n3o4p5
|
||||
;; h2 = aiqybjrzcks0dlt1
|
||||
;; l2 = emu2fnv3gow4hpx5
|
||||
;; h3 = aeimquy2bfjnrvz3
|
||||
;; l3 = cgkosw04dhlptx15
|
||||
;; result = bdfhjlnprtvxz135
|
||||
(define_expand "vec_pack_mod_v8hi"
|
||||
[(match_operand:V16QI 0 "register_operand" "")
|
||||
(match_operand:V8HI 1 "register_operand" "")
|
||||
(match_operand:V8HI 2 "register_operand" "")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
rtx op1, op2, h1, l1, h2, l2, h3, l3;
|
||||
|
||||
op1 = gen_lowpart (V16QImode, operands[1]);
|
||||
op2 = gen_lowpart (V16QImode, operands[2]);
|
||||
h1 = gen_reg_rtx (V16QImode);
|
||||
l1 = gen_reg_rtx (V16QImode);
|
||||
h2 = gen_reg_rtx (V16QImode);
|
||||
l2 = gen_reg_rtx (V16QImode);
|
||||
h3 = gen_reg_rtx (V16QImode);
|
||||
l3 = gen_reg_rtx (V16QImode);
|
||||
|
||||
emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
|
||||
emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
|
||||
emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
|
||||
emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
|
||||
emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
|
||||
emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
|
||||
emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; Reduce:
|
||||
;; op1 = abcdefgh
|
||||
;; op2 = ijklmnop
|
||||
;; h1 = aibjckdl
|
||||
;; l1 = emfngohp
|
||||
;; h2 = aeimbfjn
|
||||
;; l2 = cgkodhlp
|
||||
;; result = bdfhjlnp
|
||||
(define_expand "vec_pack_mod_v4si"
|
||||
[(match_operand:V8HI 0 "register_operand" "")
|
||||
(match_operand:V4SI 1 "register_operand" "")
|
||||
(match_operand:V4SI 2 "register_operand" "")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
rtx op1, op2, h1, l1, h2, l2;
|
||||
|
||||
op1 = gen_lowpart (V8HImode, operands[1]);
|
||||
op2 = gen_lowpart (V8HImode, operands[2]);
|
||||
h1 = gen_reg_rtx (V8HImode);
|
||||
l1 = gen_reg_rtx (V8HImode);
|
||||
h2 = gen_reg_rtx (V8HImode);
|
||||
l2 = gen_reg_rtx (V8HImode);
|
||||
|
||||
emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
|
||||
emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
|
||||
emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
|
||||
emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
|
||||
emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
|
||||
DONE;
|
||||
})
|
||||
|
||||
;; Reduce:
|
||||
;; op1 = abcd
|
||||
;; op2 = efgh
|
||||
;; h1 = aebf
|
||||
;; l1 = cgdh
|
||||
;; result = bdfh
|
||||
(define_expand "vec_pack_mod_v2di"
|
||||
[(match_operand:V4SI 0 "register_operand" "")
|
||||
(match_operand:V2DI 1 "register_operand" "")
|
||||
(match_operand:V2DI 2 "register_operand" "")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
rtx op1, op2, h1, l1;
|
||||
|
||||
op1 = gen_lowpart (V4SImode, operands[1]);
|
||||
op2 = gen_lowpart (V4SImode, operands[2]);
|
||||
h1 = gen_reg_rtx (V4SImode);
|
||||
l1 = gen_reg_rtx (V4SImode);
|
||||
|
||||
emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
|
||||
emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
|
||||
emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_interleave_highv16qi"
|
||||
[(set (match_operand:V16QI 0 "register_operand" "=x")
|
||||
(vec_select:V16QI
|
||||
(vec_concat:V32QI
|
||||
(match_operand:V16QI 1 "register_operand" "0")
|
||||
(match_operand:V16QI 2 "nonimmediate_operand" "xm"))
|
||||
(parallel [(const_int 8) (const_int 24)
|
||||
(const_int 9) (const_int 25)
|
||||
(const_int 10) (const_int 26)
|
||||
(const_int 11) (const_int 27)
|
||||
(const_int 12) (const_int 28)
|
||||
(const_int 13) (const_int 29)
|
||||
(const_int 14) (const_int 30)
|
||||
(const_int 15) (const_int 31)])))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_interleave_lowv16qi"
|
||||
[(set (match_operand:V16QI 0 "register_operand" "=x")
|
||||
(vec_select:V16QI
|
||||
(vec_concat:V32QI
|
||||
(match_operand:V16QI 1 "register_operand" "0")
|
||||
(match_operand:V16QI 2 "nonimmediate_operand" "xm"))
|
||||
(parallel [(const_int 0) (const_int 16)
|
||||
(const_int 1) (const_int 17)
|
||||
(const_int 2) (const_int 18)
|
||||
(const_int 3) (const_int 19)
|
||||
(const_int 4) (const_int 20)
|
||||
(const_int 5) (const_int 21)
|
||||
(const_int 6) (const_int 22)
|
||||
(const_int 7) (const_int 23)])))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_interleave_highv8hi"
|
||||
[(set (match_operand:V8HI 0 "register_operand" "=x")
|
||||
(vec_select:V8HI
|
||||
(vec_concat:V16HI
|
||||
(match_operand:V8HI 1 "register_operand" "0")
|
||||
(match_operand:V8HI 2 "nonimmediate_operand" "xm"))
|
||||
(parallel [(const_int 4) (const_int 12)
|
||||
(const_int 5) (const_int 13)
|
||||
(const_int 6) (const_int 14)
|
||||
(const_int 7) (const_int 15)])))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_interleave_lowv8hi"
|
||||
[(set (match_operand:V8HI 0 "register_operand" "=x")
|
||||
(vec_select:V8HI
|
||||
(vec_concat:V16HI
|
||||
(match_operand:V8HI 1 "register_operand" "0")
|
||||
(match_operand:V8HI 2 "nonimmediate_operand" "xm"))
|
||||
(parallel [(const_int 0) (const_int 8)
|
||||
(const_int 1) (const_int 9)
|
||||
(const_int 2) (const_int 10)
|
||||
(const_int 3) (const_int 11)])))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_interleave_highv4si"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "=x")
|
||||
(vec_select:V4SI
|
||||
(vec_concat:V8SI
|
||||
(match_operand:V4SI 1 "register_operand" "0")
|
||||
(match_operand:V4SI 2 "nonimmediate_operand" "xm"))
|
||||
(parallel [(const_int 2) (const_int 6)
|
||||
(const_int 3) (const_int 7)])))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_interleave_lowv4si"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "=x")
|
||||
(vec_select:V4SI
|
||||
(vec_concat:V8SI
|
||||
(match_operand:V4SI 1 "register_operand" "0")
|
||||
(match_operand:V4SI 2 "nonimmediate_operand" "xm"))
|
||||
(parallel [(const_int 0) (const_int 4)
|
||||
(const_int 1) (const_int 5)])))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_interleave_highv2di"
|
||||
[(set (match_operand:V2DI 0 "register_operand" "=x")
|
||||
(vec_select:V2DI
|
||||
(vec_concat:V4DI
|
||||
(match_operand:V2DI 1 "register_operand" "0")
|
||||
(match_operand:V2DI 2 "nonimmediate_operand" "xm"))
|
||||
(parallel [(const_int 1)
|
||||
(const_int 3)])))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_interleave_lowv2di"
|
||||
[(set (match_operand:V2DI 0 "register_operand" "=x")
|
||||
(vec_select:V2DI
|
||||
(vec_concat:V4DI
|
||||
(match_operand:V2DI 1 "register_operand" "0")
|
||||
(match_operand:V2DI 2 "nonimmediate_operand" "xm"))
|
||||
(parallel [(const_int 0)
|
||||
(const_int 2)])))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_insn "sse2_packsswb"
|
||||
[(set (match_operand:V16QI 0 "register_operand" "=x")
|
||||
(vec_concat:V16QI
|
||||
@ -3832,6 +4195,114 @@
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_unpacku_hi_v16qi"
|
||||
[(match_operand:V8HI 0 "register_operand" "")
|
||||
(match_operand:V16QI 1 "register_operand" "")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
ix86_expand_sse_unpack (operands, true, true);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_unpacks_hi_v16qi"
|
||||
[(match_operand:V8HI 0 "register_operand" "")
|
||||
(match_operand:V16QI 1 "register_operand" "")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
ix86_expand_sse_unpack (operands, false, true);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_unpacku_lo_v16qi"
|
||||
[(match_operand:V8HI 0 "register_operand" "")
|
||||
(match_operand:V16QI 1 "register_operand" "")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
ix86_expand_sse_unpack (operands, true, false);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_unpacks_lo_v16qi"
|
||||
[(match_operand:V8HI 0 "register_operand" "")
|
||||
(match_operand:V16QI 1 "register_operand" "")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
ix86_expand_sse_unpack (operands, false, false);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_unpacku_hi_v8hi"
|
||||
[(match_operand:V4SI 0 "register_operand" "")
|
||||
(match_operand:V8HI 1 "register_operand" "")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
ix86_expand_sse_unpack (operands, true, true);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_unpacks_hi_v8hi"
|
||||
[(match_operand:V4SI 0 "register_operand" "")
|
||||
(match_operand:V8HI 1 "register_operand" "")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
ix86_expand_sse_unpack (operands, false, true);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_unpacku_lo_v8hi"
|
||||
[(match_operand:V4SI 0 "register_operand" "")
|
||||
(match_operand:V8HI 1 "register_operand" "")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
ix86_expand_sse_unpack (operands, true, false);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_unpacks_lo_v8hi"
|
||||
[(match_operand:V4SI 0 "register_operand" "")
|
||||
(match_operand:V8HI 1 "register_operand" "")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
ix86_expand_sse_unpack (operands, false, false);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_unpacku_hi_v4si"
|
||||
[(match_operand:V2DI 0 "register_operand" "")
|
||||
(match_operand:V4SI 1 "register_operand" "")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
ix86_expand_sse_unpack (operands, true, true);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_unpacks_hi_v4si"
|
||||
[(match_operand:V2DI 0 "register_operand" "")
|
||||
(match_operand:V4SI 1 "register_operand" "")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
ix86_expand_sse_unpack (operands, false, true);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_unpacku_lo_v4si"
|
||||
[(match_operand:V2DI 0 "register_operand" "")
|
||||
(match_operand:V4SI 1 "register_operand" "")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
ix86_expand_sse_unpack (operands, true, false);
|
||||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "vec_unpacks_lo_v4si"
|
||||
[(match_operand:V2DI 0 "register_operand" "")
|
||||
(match_operand:V4SI 1 "register_operand" "")]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
ix86_expand_sse_unpack (operands, false, false);
|
||||
DONE;
|
||||
})
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;
|
||||
;; Miscellaneous
|
||||
|
@ -122,6 +122,20 @@
|
||||
(UNSPEC_VCONDU_V4SI 305)
|
||||
(UNSPEC_VCONDU_V8HI 306)
|
||||
(UNSPEC_VCONDU_V16QI 307)
|
||||
(UNSPEC_VMULWHUB 308)
|
||||
(UNSPEC_VMULWLUB 309)
|
||||
(UNSPEC_VMULWHSB 310)
|
||||
(UNSPEC_VMULWLSB 311)
|
||||
(UNSPEC_VMULWHUH 312)
|
||||
(UNSPEC_VMULWLUH 313)
|
||||
(UNSPEC_VMULWHSH 314)
|
||||
(UNSPEC_VMULWLSH 315)
|
||||
(UNSPEC_VUPKHUB 316)
|
||||
(UNSPEC_VUPKHUH 317)
|
||||
(UNSPEC_VUPKLUB 318)
|
||||
(UNSPEC_VUPKLUH 319)
|
||||
(UNSPEC_VPERMSI 320)
|
||||
(UNSPEC_VPERMHI 321)
|
||||
])
|
||||
|
||||
(define_constants
|
||||
@ -2203,6 +2217,371 @@
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "vec_unpacks_hi_v16qi"
|
||||
[(set (match_operand:V8HI 0 "register_operand" "=v")
|
||||
(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
|
||||
UNSPEC_VUPKHSB))]
|
||||
"TARGET_ALTIVEC"
|
||||
"
|
||||
{
|
||||
emit_insn (gen_altivec_vupkhsb (operands[0], operands[1]));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "vec_unpacks_hi_v8hi"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "=v")
|
||||
(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
|
||||
UNSPEC_VUPKHSH))]
|
||||
"TARGET_ALTIVEC"
|
||||
"
|
||||
{
|
||||
emit_insn (gen_altivec_vupkhsh (operands[0], operands[1]));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "vec_unpacks_lo_v16qi"
|
||||
[(set (match_operand:V8HI 0 "register_operand" "=v")
|
||||
(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
|
||||
UNSPEC_VUPKLSB))]
|
||||
"TARGET_ALTIVEC"
|
||||
"
|
||||
{
|
||||
emit_insn (gen_altivec_vupklsb (operands[0], operands[1]));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "vec_unpacks_lo_v8hi"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "=v")
|
||||
(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
|
||||
UNSPEC_VUPKLSH))]
|
||||
"TARGET_ALTIVEC"
|
||||
"
|
||||
{
|
||||
emit_insn (gen_altivec_vupklsh (operands[0], operands[1]));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_insn "vperm_v8hiv4si"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "=v")
|
||||
(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
|
||||
(match_operand:V4SI 2 "register_operand" "v")
|
||||
(match_operand:V16QI 3 "register_operand" "v")]
|
||||
UNSPEC_VPERMSI))]
|
||||
"TARGET_ALTIVEC"
|
||||
"vperm %0,%1,%2,%3"
|
||||
[(set_attr "type" "vecperm")])
|
||||
|
||||
(define_insn "vperm_v16qiv8hi"
|
||||
[(set (match_operand:V8HI 0 "register_operand" "=v")
|
||||
(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
|
||||
(match_operand:V8HI 2 "register_operand" "v")
|
||||
(match_operand:V16QI 3 "register_operand" "v")]
|
||||
UNSPEC_VPERMHI))]
|
||||
"TARGET_ALTIVEC"
|
||||
"vperm %0,%1,%2,%3"
|
||||
[(set_attr "type" "vecperm")])
|
||||
|
||||
|
||||
(define_expand "vec_unpacku_hi_v16qi"
|
||||
[(set (match_operand:V8HI 0 "register_operand" "=v")
|
||||
(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
|
||||
UNSPEC_VUPKHUB))]
|
||||
"TARGET_ALTIVEC"
|
||||
"
|
||||
{
|
||||
rtx vzero = gen_reg_rtx (V8HImode);
|
||||
rtx mask = gen_reg_rtx (V16QImode);
|
||||
rtvec v = rtvec_alloc (16);
|
||||
|
||||
emit_insn (gen_altivec_vspltish (vzero, const0_rtx));
|
||||
|
||||
RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 16);
|
||||
RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 0);
|
||||
RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 16);
|
||||
RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 1);
|
||||
RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 16);
|
||||
RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 2);
|
||||
RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 16);
|
||||
RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 3);
|
||||
RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16);
|
||||
RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 4);
|
||||
RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 16);
|
||||
RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 5);
|
||||
RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 16);
|
||||
RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 6);
|
||||
RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 16);
|
||||
RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 7);
|
||||
|
||||
emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
|
||||
emit_insn (gen_vperm_v16qiv8hi (operands[0], operands[1], vzero, mask));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "vec_unpacku_hi_v8hi"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "=v")
|
||||
(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
|
||||
UNSPEC_VUPKHUH))]
|
||||
"TARGET_ALTIVEC"
|
||||
"
|
||||
{
|
||||
rtx vzero = gen_reg_rtx (V4SImode);
|
||||
rtx mask = gen_reg_rtx (V16QImode);
|
||||
rtvec v = rtvec_alloc (16);
|
||||
|
||||
emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
|
||||
|
||||
RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 16);
|
||||
RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 17);
|
||||
RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 0);
|
||||
RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 1);
|
||||
RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 16);
|
||||
RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 17);
|
||||
RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 2);
|
||||
RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 3);
|
||||
RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16);
|
||||
RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 17);
|
||||
RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 4);
|
||||
RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 5);
|
||||
RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 16);
|
||||
RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 17);
|
||||
RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 6);
|
||||
RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 7);
|
||||
|
||||
emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
|
||||
emit_insn (gen_vperm_v8hiv4si (operands[0], operands[1], vzero, mask));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "vec_unpacku_lo_v16qi"
|
||||
[(set (match_operand:V8HI 0 "register_operand" "=v")
|
||||
(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
|
||||
UNSPEC_VUPKLUB))]
|
||||
"TARGET_ALTIVEC"
|
||||
"
|
||||
{
|
||||
rtx vzero = gen_reg_rtx (V8HImode);
|
||||
rtx mask = gen_reg_rtx (V16QImode);
|
||||
rtvec v = rtvec_alloc (16);
|
||||
|
||||
emit_insn (gen_altivec_vspltish (vzero, const0_rtx));
|
||||
|
||||
RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 16);
|
||||
RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 8);
|
||||
RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 16);
|
||||
RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 9);
|
||||
RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 16);
|
||||
RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 10);
|
||||
RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 16);
|
||||
RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 11);
|
||||
RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16);
|
||||
RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 12);
|
||||
RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 16);
|
||||
RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 13);
|
||||
RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 16);
|
||||
RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 14);
|
||||
RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 16);
|
||||
RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 15);
|
||||
|
||||
emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
|
||||
emit_insn (gen_vperm_v16qiv8hi (operands[0], operands[1], vzero, mask));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "vec_unpacku_lo_v8hi"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "=v")
|
||||
(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
|
||||
UNSPEC_VUPKLUH))]
|
||||
"TARGET_ALTIVEC"
|
||||
"
|
||||
{
|
||||
rtx vzero = gen_reg_rtx (V4SImode);
|
||||
rtx mask = gen_reg_rtx (V16QImode);
|
||||
rtvec v = rtvec_alloc (16);
|
||||
|
||||
emit_insn (gen_altivec_vspltisw (vzero, const0_rtx));
|
||||
|
||||
RTVEC_ELT (v, 0) = gen_rtx_CONST_INT (QImode, 16);
|
||||
RTVEC_ELT (v, 1) = gen_rtx_CONST_INT (QImode, 17);
|
||||
RTVEC_ELT (v, 2) = gen_rtx_CONST_INT (QImode, 8);
|
||||
RTVEC_ELT (v, 3) = gen_rtx_CONST_INT (QImode, 9);
|
||||
RTVEC_ELT (v, 4) = gen_rtx_CONST_INT (QImode, 16);
|
||||
RTVEC_ELT (v, 5) = gen_rtx_CONST_INT (QImode, 17);
|
||||
RTVEC_ELT (v, 6) = gen_rtx_CONST_INT (QImode, 10);
|
||||
RTVEC_ELT (v, 7) = gen_rtx_CONST_INT (QImode, 11);
|
||||
RTVEC_ELT (v, 8) = gen_rtx_CONST_INT (QImode, 16);
|
||||
RTVEC_ELT (v, 9) = gen_rtx_CONST_INT (QImode, 17);
|
||||
RTVEC_ELT (v, 10) = gen_rtx_CONST_INT (QImode, 12);
|
||||
RTVEC_ELT (v, 11) = gen_rtx_CONST_INT (QImode, 13);
|
||||
RTVEC_ELT (v, 12) = gen_rtx_CONST_INT (QImode, 16);
|
||||
RTVEC_ELT (v, 13) = gen_rtx_CONST_INT (QImode, 17);
|
||||
RTVEC_ELT (v, 14) = gen_rtx_CONST_INT (QImode, 14);
|
||||
RTVEC_ELT (v, 15) = gen_rtx_CONST_INT (QImode, 15);
|
||||
|
||||
emit_insn (gen_vec_initv16qi (mask, gen_rtx_PARALLEL (V16QImode, v)));
|
||||
emit_insn (gen_vperm_v8hiv4si (operands[0], operands[1], vzero, mask));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "vec_widen_umult_hi_v16qi"
|
||||
[(set (match_operand:V8HI 0 "register_operand" "=v")
|
||||
(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
|
||||
(match_operand:V16QI 2 "register_operand" "v")]
|
||||
UNSPEC_VMULWHUB))]
|
||||
"TARGET_ALTIVEC"
|
||||
"
|
||||
{
|
||||
rtx ve = gen_reg_rtx (V8HImode);
|
||||
rtx vo = gen_reg_rtx (V8HImode);
|
||||
|
||||
emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2]));
|
||||
emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2]));
|
||||
emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "vec_widen_umult_lo_v16qi"
|
||||
[(set (match_operand:V8HI 0 "register_operand" "=v")
|
||||
(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
|
||||
(match_operand:V16QI 2 "register_operand" "v")]
|
||||
UNSPEC_VMULWLUB))]
|
||||
"TARGET_ALTIVEC"
|
||||
"
|
||||
{
|
||||
rtx ve = gen_reg_rtx (V8HImode);
|
||||
rtx vo = gen_reg_rtx (V8HImode);
|
||||
|
||||
emit_insn (gen_altivec_vmuleub (ve, operands[1], operands[2]));
|
||||
emit_insn (gen_altivec_vmuloub (vo, operands[1], operands[2]));
|
||||
emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "vec_widen_smult_hi_v16qi"
|
||||
[(set (match_operand:V8HI 0 "register_operand" "=v")
|
||||
(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
|
||||
(match_operand:V16QI 2 "register_operand" "v")]
|
||||
UNSPEC_VMULWHSB))]
|
||||
"TARGET_ALTIVEC"
|
||||
"
|
||||
{
|
||||
rtx ve = gen_reg_rtx (V8HImode);
|
||||
rtx vo = gen_reg_rtx (V8HImode);
|
||||
|
||||
emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2]));
|
||||
emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2]));
|
||||
emit_insn (gen_altivec_vmrghh (operands[0], ve, vo));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "vec_widen_smult_lo_v16qi"
|
||||
[(set (match_operand:V8HI 0 "register_operand" "=v")
|
||||
(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
|
||||
(match_operand:V16QI 2 "register_operand" "v")]
|
||||
UNSPEC_VMULWLSB))]
|
||||
"TARGET_ALTIVEC"
|
||||
"
|
||||
{
|
||||
rtx ve = gen_reg_rtx (V8HImode);
|
||||
rtx vo = gen_reg_rtx (V8HImode);
|
||||
|
||||
emit_insn (gen_altivec_vmulesb (ve, operands[1], operands[2]));
|
||||
emit_insn (gen_altivec_vmulosb (vo, operands[1], operands[2]));
|
||||
emit_insn (gen_altivec_vmrglh (operands[0], ve, vo));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "vec_widen_umult_hi_v8hi"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "=v")
|
||||
(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
|
||||
(match_operand:V8HI 2 "register_operand" "v")]
|
||||
UNSPEC_VMULWHUH))]
|
||||
"TARGET_ALTIVEC"
|
||||
"
|
||||
{
|
||||
rtx ve = gen_reg_rtx (V4SImode);
|
||||
rtx vo = gen_reg_rtx (V4SImode);
|
||||
|
||||
emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2]));
|
||||
emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2]));
|
||||
emit_insn (gen_altivec_vmrghw (operands[0], ve, vo));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "vec_widen_umult_lo_v8hi"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "=v")
|
||||
(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
|
||||
(match_operand:V8HI 2 "register_operand" "v")]
|
||||
UNSPEC_VMULWLUH))]
|
||||
"TARGET_ALTIVEC"
|
||||
"
|
||||
{
|
||||
rtx ve = gen_reg_rtx (V4SImode);
|
||||
rtx vo = gen_reg_rtx (V4SImode);
|
||||
|
||||
emit_insn (gen_altivec_vmuleuh (ve, operands[1], operands[2]));
|
||||
emit_insn (gen_altivec_vmulouh (vo, operands[1], operands[2]));
|
||||
emit_insn (gen_altivec_vmrglw (operands[0], ve, vo));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "vec_widen_smult_hi_v8hi"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "=v")
|
||||
(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
|
||||
(match_operand:V8HI 2 "register_operand" "v")]
|
||||
UNSPEC_VMULWHSH))]
|
||||
"TARGET_ALTIVEC"
|
||||
"
|
||||
{
|
||||
rtx ve = gen_reg_rtx (V4SImode);
|
||||
rtx vo = gen_reg_rtx (V4SImode);
|
||||
|
||||
emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2]));
|
||||
emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2]));
|
||||
emit_insn (gen_altivec_vmrghw (operands[0], ve, vo));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "vec_widen_smult_lo_v8hi"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "=v")
|
||||
(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
|
||||
(match_operand:V8HI 2 "register_operand" "v")]
|
||||
UNSPEC_VMULWLSH))]
|
||||
"TARGET_ALTIVEC"
|
||||
"
|
||||
{
|
||||
rtx ve = gen_reg_rtx (V4SImode);
|
||||
rtx vo = gen_reg_rtx (V4SImode);
|
||||
|
||||
emit_insn (gen_altivec_vmulesh (ve, operands[1], operands[2]));
|
||||
emit_insn (gen_altivec_vmulosh (vo, operands[1], operands[2]));
|
||||
emit_insn (gen_altivec_vmrglw (operands[0], ve, vo));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "vec_pack_mod_v8hi"
|
||||
[(set (match_operand:V16QI 0 "register_operand" "=v")
|
||||
(unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
|
||||
(match_operand:V8HI 2 "register_operand" "v")]
|
||||
UNSPEC_VPKUHUM))]
|
||||
"TARGET_ALTIVEC"
|
||||
"
|
||||
{
|
||||
emit_insn (gen_altivec_vpkuhum (operands[0], operands[1], operands[2]));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "vec_pack_mod_v4si"
|
||||
[(set (match_operand:V8HI 0 "register_operand" "=v")
|
||||
(unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
|
||||
(match_operand:V4SI 2 "register_operand" "v")]
|
||||
UNSPEC_VPKUWUM))]
|
||||
"TARGET_ALTIVEC"
|
||||
"
|
||||
{
|
||||
emit_insn (gen_altivec_vpkuwum (operands[0], operands[1], operands[2]));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "negv4sf2"
|
||||
[(use (match_operand:V4SF 0 "register_operand" ""))
|
||||
(use (match_operand:V4SF 1 "register_operand" ""))]
|
||||
|
@ -693,6 +693,8 @@ static int rs6000_sched_reorder (FILE *, int, rtx *, int *, int);
|
||||
static int rs6000_sched_reorder2 (FILE *, int, rtx *, int *, int);
|
||||
static int rs6000_use_sched_lookahead (void);
|
||||
static tree rs6000_builtin_mask_for_load (void);
|
||||
static tree rs6000_builtin_mul_widen_even (tree);
|
||||
static tree rs6000_builtin_mul_widen_odd (tree);
|
||||
|
||||
static void def_builtin (int, const char *, tree, int);
|
||||
static void rs6000_init_builtins (void);
|
||||
@ -952,6 +954,10 @@ static const char alt_reg_names[][8] =
|
||||
|
||||
#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
|
||||
#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
|
||||
#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
|
||||
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN rs6000_builtin_mul_widen_even
|
||||
#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
|
||||
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD rs6000_builtin_mul_widen_odd
|
||||
|
||||
#undef TARGET_INIT_BUILTINS
|
||||
#define TARGET_INIT_BUILTINS rs6000_init_builtins
|
||||
@ -1631,6 +1637,52 @@ rs6000_builtin_mask_for_load (void)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Implement targetm.vectorize.builtin_mul_widen_even. */
|
||||
static tree
|
||||
rs6000_builtin_mul_widen_even (tree type)
|
||||
{
|
||||
if (!TARGET_ALTIVEC)
|
||||
return NULL_TREE;
|
||||
|
||||
switch (TYPE_MODE (type))
|
||||
{
|
||||
case V8HImode:
|
||||
return TYPE_UNSIGNED (type) ?
|
||||
rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULEUH] :
|
||||
rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULESH];
|
||||
|
||||
case V16QImode:
|
||||
return TYPE_UNSIGNED (type) ?
|
||||
rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULEUB] :
|
||||
rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULESB];
|
||||
default:
|
||||
return NULL_TREE;
|
||||
}
|
||||
}
|
||||
|
||||
/* Implement targetm.vectorize.builtin_mul_widen_odd. */
|
||||
static tree
|
||||
rs6000_builtin_mul_widen_odd (tree type)
|
||||
{
|
||||
if (!TARGET_ALTIVEC)
|
||||
return NULL_TREE;
|
||||
|
||||
switch (TYPE_MODE (type))
|
||||
{
|
||||
case V8HImode:
|
||||
return TYPE_UNSIGNED (type) ?
|
||||
rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULOUH] :
|
||||
rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULOSH];
|
||||
|
||||
case V16QImode:
|
||||
return TYPE_UNSIGNED (type) ?
|
||||
rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULOUB] :
|
||||
rs6000_builtin_decls[ALTIVEC_BUILTIN_VMULOSB];
|
||||
default:
|
||||
return NULL_TREE;
|
||||
}
|
||||
}
|
||||
|
||||
/* Handle generic options of the form -mfoo=yes/no.
|
||||
NAME is the option name.
|
||||
VALUE is the option value.
|
||||
|
@ -1928,6 +1928,14 @@ This macro returns the attributes on the type @var{type}.
|
||||
@tindex OMP_CONTINUE
|
||||
@tindex OMP_ATOMIC
|
||||
@tindex OMP_CLAUSE
|
||||
@tindex VEC_LSHIFT_EXPR
|
||||
@tindex VEC_RSHIFT_EXPR
|
||||
@tindex VEC_WIDEN_MULT_HI_EXPR
|
||||
@tindex VEC_WIDEN_MULT_LO_EXPR
|
||||
@tindex VEC_UNPACK_HI_EXPR
|
||||
@tindex VEC_UNPACK_LO_EXPR
|
||||
@tindex VEC_PACK_MOD_EXPR
|
||||
@tindex VEC_PACK_SAT_EXPR
|
||||
|
||||
The internal representation for expressions is for the most part quite
|
||||
straightforward. However, there are a few facts that one must bear in
|
||||
@ -2735,4 +2743,44 @@ same clause @code{C} need to be represented as multiple @code{C} clauses
|
||||
chained together. This facilitates adding new clauses during
|
||||
compilation.
|
||||
|
||||
@item VEC_LSHIFT_EXPR
|
||||
@item VEC_RSHIFT_EXPR
|
||||
These nodes represent whole vector left and right shifts, respectively.
|
||||
The first operand is the vector to shift; it will always be of vector type.
|
||||
The second operand is an expression for the number of bits by which to
|
||||
shift. Note that the result is undefined if the second operand is larger
|
||||
than or equal to the first operand's type size.
|
||||
|
||||
@item VEC_WIDEN_MULT_HI_EXPR
|
||||
@item VEC_WIDEN_MULT_LO_EXPR
|
||||
These nodes represent widening vector multiplication of the high and low
|
||||
parts of the two input vectors, respectively. Their operands are vectors
|
||||
that contain the same number of elements (@code{N}) of the same integral type.
|
||||
The result is a vector that contains half as many elements, of an integral type
|
||||
whose size is twice as wide. In the case of @code{VEC_WIDEN_MULT_HI_EXPR} the
|
||||
high @code{N/2} elements of the two vector are multiplied to produce the
|
||||
vector of @code{N/2} products. In the case of @code{VEC_WIDEN_MULT_LO_EXPR} the
|
||||
low @code{N/2} elements of the two vector are multiplied to produce the
|
||||
vector of @code{N/2} products.
|
||||
|
||||
@item VEC_UNPACK_HI_EXPR
|
||||
@item VEC_UNPACK_LO_EXPR
|
||||
These nodes represent unpacking of the high and low parts of the input vector,
|
||||
respectively. The single operand is a vector that contains @code{N} elements
|
||||
of the same integral type. The result is a vector that contains half as many
|
||||
elements, of an integral type whose size is twice as wide. In the case of
|
||||
@code{VEC_UNPACK_HI_EXPR} the high @code{N/2} elements of the vector are
|
||||
extracted and widened (promoted). In the case of @code{VEC_UNPACK_LO_EXPR} the
|
||||
low @code{N/2} elements of the vector are extracted and widened (promoted).
|
||||
|
||||
@item VEC_PACK_MOD_EXPR
|
||||
@item VEC_PACK_SAT_EXPR
|
||||
These nodes represent packing of elements of the two input vectors into the
|
||||
output vector, using modulo or saturating arithmetic, respectively.
|
||||
Their operands are vectors that contain the same number of elements
|
||||
of the same integral type. The result is a vector that contains twice as many
|
||||
elements, of an integral type whose size is half as wide. In both cases
|
||||
the elements of the two vectors are demoted and merged (concatenated) to form
|
||||
the output vector.
|
||||
|
||||
@end table
|
||||
|
@ -3495,6 +3495,36 @@ Operand 2 is an integer shift amount in bits.
|
||||
Operand 0 is where the resulting shifted vector is stored.
|
||||
The output and input vectors should have the same modes.
|
||||
|
||||
@cindex @code{vec_pack_mod_@var{m}} instruction pattern
|
||||
@cindex @code{vec_pack_ssat_@var{m}} instruction pattern
|
||||
@cindex @code{vec_pack_usat_@var{m}} instruction pattern
|
||||
@item @samp{vec_pack_mod_@var{m}}, @samp{vec_pack_ssat_@var{m}}, @samp{vec_pack_usat_@var{m}}
|
||||
Narrow (demote) and merge the elements of two vectors.
|
||||
Operands 1 and 2 are vectors of the same mode.
|
||||
Operand 0 is the resulting vector in which the elements of the two input
|
||||
vectors are concatenated after narrowing them down using modulo arithmetic or
|
||||
signed/unsigned saturating arithmetic.
|
||||
|
||||
@cindex @code{vec_unpacks_hi_@var{m}} instruction pattern
|
||||
@cindex @code{vec_unpacks_lo_@var{m}} instruction pattern
|
||||
@cindex @code{vec_unpacku_hi_@var{m}} instruction pattern
|
||||
@cindex @code{vec_unpacku_lo_@var{m}} instruction pattern
|
||||
@item @samp{vec_unpacks_hi_@var{m}}, @samp{vec_unpacks_lo_@var{m}}, @samp{vec_unpacku_hi_@var{m}}, @samp{vec_unpacku_lo_@var{m}}
|
||||
Extract and widen (promote) the high/low part of a vector of signed/unsigned
|
||||
elements. The input vector (operand 1) has N signed/unsigned elements of size S.
|
||||
Using sign/zero extension widen (promote) the high/low elements of the vector,
|
||||
and place the resulting N/2 values of size 2*S in the output vector (operand 0).
|
||||
|
||||
@cindex @code{vec_widen_umult_hi_@var{m}} instruction pattern
|
||||
@cindex @code{vec_widen_umult_lo__@var{m}} instruction pattern
|
||||
@cindex @code{vec_widen_smult_hi_@var{m}} instruction pattern
|
||||
@cindex @code{vec_widen_smult_lo_@var{m}} instruction pattern
|
||||
@item @samp{vec_widen_umult_hi_@var{m}}, @samp{vec_widen_umult_lo_@var{m}}, @samp{vec_widen_smult_hi_@var{m}}, @samp{vec_widen_smult_lo_@var{m}}
|
||||
Signed/Unsigned widening multiplication.
|
||||
The two inputs (operands 1 and 2) are vectors with N
|
||||
signed/unsigned elements of size S. Multiply the high/low elements of the two
|
||||
vectors, and put the N/2 products of size 2*S in the output vector (opernad 0).
|
||||
|
||||
@cindex @code{mulhisi3} instruction pattern
|
||||
@item @samp{mulhisi3}
|
||||
Multiply operands 1 and 2, which have mode @code{HImode}, and store
|
||||
|
@ -5284,6 +5284,28 @@ the argument @var{OFF} to @code{REALIGN_LOAD}, in which case the low
|
||||
log2(@var{VS})-1 bits of @var{addr} will be considered.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN (tree @var{x})
|
||||
This hook should return the DECL of a function @var{f} that implements
|
||||
widening multiplication of the even elements of two input vectors of type @var{x}.
|
||||
|
||||
If this hook is defined, the autovectorizer will use it along with the
|
||||
@code{TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD} target hook when vectorizing
|
||||
widening multiplication in cases that the order of the results does not have to be
|
||||
preserved (e.g. used only by a reduction computation). Otherwise, the
|
||||
@code{widen_mult_hi/lo} idioms will be used.
|
||||
@end deftypefn
|
||||
|
||||
@deftypefn {Target Hook} tree TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD (tree @var{x})
|
||||
This hook should return the DECL of a function @var{f} that implements
|
||||
widening multiplication of the odd elements of two input vectors of type @var{x}.
|
||||
|
||||
If this hook is defined, the autovectorizer will use it along with the
|
||||
@code{TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN} target hook when vectorizing
|
||||
widening multiplication in cases that the order of the results does not have to be
|
||||
preserved (e.g. used only by a reduction computation). Otherwise, the
|
||||
@code{widen_mult_hi/lo} idioms will be used.
|
||||
@end deftypefn
|
||||
|
||||
@node Anchored Addresses
|
||||
@section Anchored Addresses
|
||||
@cindex anchored addresses
|
||||
|
31
gcc/expr.c
31
gcc/expr.c
@ -8757,6 +8757,37 @@ expand_expr_real_1 (tree exp, rtx target, enum machine_mode tmode,
|
||||
return target;
|
||||
}
|
||||
|
||||
case VEC_UNPACK_HI_EXPR:
|
||||
case VEC_UNPACK_LO_EXPR:
|
||||
{
|
||||
op0 = expand_expr (TREE_OPERAND (exp, 0), NULL_RTX, VOIDmode, 0);
|
||||
this_optab = optab_for_tree_code (code, type);
|
||||
temp = expand_widen_pattern_expr (exp, op0, NULL_RTX, NULL_RTX,
|
||||
target, unsignedp);
|
||||
gcc_assert (temp);
|
||||
return temp;
|
||||
}
|
||||
|
||||
case VEC_WIDEN_MULT_HI_EXPR:
|
||||
case VEC_WIDEN_MULT_LO_EXPR:
|
||||
{
|
||||
tree oprnd0 = TREE_OPERAND (exp, 0);
|
||||
tree oprnd1 = TREE_OPERAND (exp, 1);
|
||||
|
||||
expand_operands (oprnd0, oprnd1, NULL_RTX, &op0, &op1, 0);
|
||||
target = expand_widen_pattern_expr (exp, op0, op1, NULL_RTX,
|
||||
target, unsignedp);
|
||||
gcc_assert (target);
|
||||
return target;
|
||||
}
|
||||
|
||||
case VEC_PACK_MOD_EXPR:
|
||||
case VEC_PACK_SAT_EXPR:
|
||||
{
|
||||
mode = TYPE_MODE (TREE_TYPE (TREE_OPERAND (exp, 0)));
|
||||
goto binop;
|
||||
}
|
||||
|
||||
default:
|
||||
return lang_hooks.expand_expr (exp, original_target, tmode,
|
||||
modifier, alt_rtl);
|
||||
|
@ -214,7 +214,17 @@ static const char * const optabs[] =
|
||||
"reduc_smin_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_smin_$a$)",
|
||||
"reduc_umin_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_umin_$a$)",
|
||||
"reduc_splus_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_splus_$a$)" ,
|
||||
"reduc_uplus_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_uplus_$a$)"
|
||||
"reduc_uplus_optab->handlers[$A].insn_code = CODE_FOR_$(reduc_uplus_$a$)",
|
||||
"vec_widen_umult_hi_optab->handlers[$A].insn_code = CODE_FOR_$(vec_widen_umult_hi_$a$)",
|
||||
"vec_widen_umult_lo_optab->handlers[$A].insn_code = CODE_FOR_$(vec_widen_umult_lo_$a$)",
|
||||
"vec_widen_smult_hi_optab->handlers[$A].insn_code = CODE_FOR_$(vec_widen_smult_hi_$a$)",
|
||||
"vec_widen_smult_lo_optab->handlers[$A].insn_code = CODE_FOR_$(vec_widen_smult_lo_$a$)",
|
||||
"vec_unpacks_hi_optab->handlers[$A].insn_code = CODE_FOR_$(vec_unpacks_hi_$a$)",
|
||||
"vec_unpacks_lo_optab->handlers[$A].insn_code = CODE_FOR_$(vec_unpacks_lo_$a$)",
|
||||
"vec_unpacku_hi_optab->handlers[$A].insn_code = CODE_FOR_$(vec_unpacku_hi_$a$)",
|
||||
"vec_unpacku_lo_optab->handlers[$A].insn_code = CODE_FOR_$(vec_unpacku_lo_$a$)",
|
||||
"vec_pack_mod_optab->handlers[$A].insn_code = CODE_FOR_$(vec_pack_mod_$a$)",
|
||||
"vec_pack_ssat_optab->handlers[$A].insn_code = CODE_FOR_$(vec_pack_ssat_$a$)", "vec_pack_usat_optab->handlers[$A].insn_code = CODE_FOR_$(vec_pack_usat_$a$)"
|
||||
};
|
||||
|
||||
static void gen_insn (rtx);
|
||||
|
51
gcc/optabs.c
51
gcc/optabs.c
@ -315,6 +315,28 @@ optab_for_tree_code (enum tree_code code, tree type)
|
||||
case VEC_RSHIFT_EXPR:
|
||||
return vec_shr_optab;
|
||||
|
||||
case VEC_WIDEN_MULT_HI_EXPR:
|
||||
return TYPE_UNSIGNED (type) ?
|
||||
vec_widen_umult_hi_optab : vec_widen_smult_hi_optab;
|
||||
|
||||
case VEC_WIDEN_MULT_LO_EXPR:
|
||||
return TYPE_UNSIGNED (type) ?
|
||||
vec_widen_umult_lo_optab : vec_widen_smult_lo_optab;
|
||||
|
||||
case VEC_UNPACK_HI_EXPR:
|
||||
return TYPE_UNSIGNED (type) ?
|
||||
vec_unpacku_hi_optab : vec_unpacks_hi_optab;
|
||||
|
||||
case VEC_UNPACK_LO_EXPR:
|
||||
return TYPE_UNSIGNED (type) ?
|
||||
vec_unpacku_lo_optab : vec_unpacks_lo_optab;
|
||||
|
||||
case VEC_PACK_MOD_EXPR:
|
||||
return vec_pack_mod_optab;
|
||||
|
||||
case VEC_PACK_SAT_EXPR:
|
||||
return TYPE_UNSIGNED (type) ? vec_pack_usat_optab : vec_pack_ssat_optab;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
@ -1276,6 +1298,7 @@ expand_binop (enum machine_mode mode, optab binoptab, rtx op0, rtx op1,
|
||||
int icode = (int) binoptab->handlers[(int) mode].insn_code;
|
||||
enum machine_mode mode0 = insn_data[icode].operand[1].mode;
|
||||
enum machine_mode mode1 = insn_data[icode].operand[2].mode;
|
||||
enum machine_mode tmp_mode;
|
||||
rtx pat;
|
||||
rtx xop0 = op0, xop1 = op1;
|
||||
|
||||
@ -1329,8 +1352,21 @@ expand_binop (enum machine_mode mode, optab binoptab, rtx op0, rtx op1,
|
||||
&& mode1 != VOIDmode)
|
||||
xop1 = copy_to_mode_reg (mode1, xop1);
|
||||
|
||||
if (!insn_data[icode].operand[0].predicate (temp, mode))
|
||||
temp = gen_reg_rtx (mode);
|
||||
if (binoptab == vec_pack_mod_optab
|
||||
|| binoptab == vec_pack_usat_optab
|
||||
|| binoptab == vec_pack_ssat_optab)
|
||||
{
|
||||
/* The mode of the result is different then the mode of the
|
||||
arguments. */
|
||||
tmp_mode = insn_data[icode].operand[0].mode;
|
||||
if (GET_MODE_NUNITS (tmp_mode) != 2 * GET_MODE_NUNITS (mode))
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
tmp_mode = mode;
|
||||
|
||||
if (!insn_data[icode].operand[0].predicate (temp, tmp_mode))
|
||||
temp = gen_reg_rtx (tmp_mode);
|
||||
|
||||
pat = GEN_FCN (icode) (temp, xop0, xop1);
|
||||
if (pat)
|
||||
@ -5354,6 +5390,17 @@ init_optabs (void)
|
||||
vec_shr_optab = init_optab (UNKNOWN);
|
||||
vec_realign_load_optab = init_optab (UNKNOWN);
|
||||
movmisalign_optab = init_optab (UNKNOWN);
|
||||
vec_widen_umult_hi_optab = init_optab (UNKNOWN);
|
||||
vec_widen_umult_lo_optab = init_optab (UNKNOWN);
|
||||
vec_widen_smult_hi_optab = init_optab (UNKNOWN);
|
||||
vec_widen_smult_lo_optab = init_optab (UNKNOWN);
|
||||
vec_unpacks_hi_optab = init_optab (UNKNOWN);
|
||||
vec_unpacks_lo_optab = init_optab (UNKNOWN);
|
||||
vec_unpacku_hi_optab = init_optab (UNKNOWN);
|
||||
vec_unpacku_lo_optab = init_optab (UNKNOWN);
|
||||
vec_pack_mod_optab = init_optab (UNKNOWN);
|
||||
vec_pack_usat_optab = init_optab (UNKNOWN);
|
||||
vec_pack_ssat_optab = init_optab (UNKNOWN);
|
||||
|
||||
powi_optab = init_optab (UNKNOWN);
|
||||
|
||||
|
29
gcc/optabs.h
29
gcc/optabs.h
@ -260,6 +260,22 @@ enum optab_index
|
||||
OTI_vec_shr,
|
||||
/* Extract specified elements from vectors, for vector load. */
|
||||
OTI_vec_realign_load,
|
||||
/* Widening multiplication.
|
||||
The high/low part of the resulting vector of products is returned. */
|
||||
OTI_vec_widen_umult_hi,
|
||||
OTI_vec_widen_umult_lo,
|
||||
OTI_vec_widen_smult_hi,
|
||||
OTI_vec_widen_smult_lo,
|
||||
/* Extract and widen the high/low part of a vector of signed/unsigned
|
||||
elements. */
|
||||
OTI_vec_unpacks_hi,
|
||||
OTI_vec_unpacks_lo,
|
||||
OTI_vec_unpacku_hi,
|
||||
OTI_vec_unpacku_lo,
|
||||
/* Narrow (demote) and merge the elements of two vectors. */
|
||||
OTI_vec_pack_mod,
|
||||
OTI_vec_pack_usat,
|
||||
OTI_vec_pack_ssat,
|
||||
|
||||
/* Perform a raise to the power of integer. */
|
||||
OTI_powi,
|
||||
@ -385,7 +401,18 @@ extern GTY(()) optab optab_table[OTI_MAX];
|
||||
#define vec_shl_optab (optab_table[OTI_vec_shl])
|
||||
#define vec_shr_optab (optab_table[OTI_vec_shr])
|
||||
#define vec_realign_load_optab (optab_table[OTI_vec_realign_load])
|
||||
|
||||
#define vec_widen_umult_hi_optab (optab_table[OTI_vec_widen_umult_hi])
|
||||
#define vec_widen_umult_lo_optab (optab_table[OTI_vec_widen_umult_lo])
|
||||
#define vec_widen_smult_hi_optab (optab_table[OTI_vec_widen_smult_hi])
|
||||
#define vec_widen_smult_lo_optab (optab_table[OTI_vec_widen_smult_lo])
|
||||
#define vec_unpacks_hi_optab (optab_table[OTI_vec_unpacks_hi])
|
||||
#define vec_unpacku_hi_optab (optab_table[OTI_vec_unpacku_hi])
|
||||
#define vec_unpacks_lo_optab (optab_table[OTI_vec_unpacks_lo])
|
||||
#define vec_unpacku_lo_optab (optab_table[OTI_vec_unpacku_lo])
|
||||
#define vec_pack_mod_optab (optab_table[OTI_vec_pack_mod])
|
||||
#define vec_pack_ssat_optab (optab_table[OTI_vec_pack_ssat])
|
||||
#define vec_pack_usat_optab (optab_table[OTI_vec_pack_usat])
|
||||
|
||||
#define powi_optab (optab_table[OTI_powi])
|
||||
|
||||
/* Conversion optabs have their own table and indexes. */
|
||||
|
@ -332,9 +332,13 @@ Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
TARGET_SCHED_SET_SCHED_FLAGS}
|
||||
|
||||
#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD 0
|
||||
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN 0
|
||||
#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD 0
|
||||
|
||||
#define TARGET_VECTORIZE \
|
||||
{TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD}
|
||||
{TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD, \
|
||||
TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN, \
|
||||
TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD}
|
||||
|
||||
#define TARGET_DEFAULT_TARGET_FLAGS 0
|
||||
|
||||
|
@ -369,6 +369,13 @@ struct gcc_target
|
||||
by the vectorizer, and return the decl of the target builtin
|
||||
function. */
|
||||
tree (* builtin_mask_for_load) (void);
|
||||
|
||||
/* Target builtin that implements vector widening multiplication.
|
||||
builtin_mul_widen_eve computes the element-by-element products
|
||||
for the even elements, and builtin_mul_widen_odd computes the
|
||||
element-by-element products for the odd elements. */
|
||||
tree (* builtin_mul_widen_even) (tree);
|
||||
tree (* builtin_mul_widen_odd) (tree);
|
||||
} vectorize;
|
||||
|
||||
/* The initial value of target_flags. */
|
||||
|
@ -1,3 +1,67 @@
|
||||
2006-11-08 Dorit Nuzman <dorit@il.ibm.com>
|
||||
|
||||
* gcc.dg/vect/vect-1.c: Loop with multiple types removed (appears in
|
||||
vect-9.c).
|
||||
* gcc.dg/vect/vect-106.c: Removed (duplicate of vect-9.c).
|
||||
* gcc.dg/vect/vect-9.c: Now vectorizable.
|
||||
* gcc.dg/vect/vect-reduc-dot-s16a.c: Now vectorizable also on targets
|
||||
that support vect_widen_mult.
|
||||
* gcc.dg/vect/vect-reduc-dot-u16.c: Removed (split into two new tests).
|
||||
* gcc.dg/vect/vect-reduc-dot-u16a.c: New test (split from
|
||||
vect-reduc-dot-u16.c).
|
||||
* gcc.dg/vect/vect-reduc-dot-u16b.c: New test (split from
|
||||
vect-reduc-dot-u16.c).
|
||||
* gcc.dg/vect/vect-reduc-dot-s8.c: Removed (split into three new tests).
|
||||
* gcc.dg/vect/vect-reduc-dot-s8a.c: New test (split from
|
||||
vect-reduc-dot-s8.c).
|
||||
* gcc.dg/vect/vect-reduc-dot-s8b.c: New test (split from
|
||||
vect-reduc-dot-s8.c).
|
||||
* gcc.dg/vect/vect-reduc-dot-s8c.c: New test (split from
|
||||
vect-reduc-dot-s8.c).
|
||||
* gcc.dg/vect/vect-reduc-dot-u8.c: Removed (split into two new tests).
|
||||
* gcc.dg/vect/vect-reduc-dot-u8a.c: New test (split from
|
||||
vect-reduc-dot-u8.c).
|
||||
* gcc.dg/vect/vect-reduc-dot-u8b.c: New test (split from
|
||||
vect-reduc-dot-u8.c).
|
||||
* gcc.dg/vect/vect-widen-mult-sum.c: New test.
|
||||
* gcc.dg/vect/vect-multitypes-9.c: New test.
|
||||
* gcc.dg/vect/vect-multitypes-10.c: New test.
|
||||
* gcc.dg/vect/vect-widen-mult-s16.c: New test.
|
||||
* gcc.dg/vect/vect-widen-mult-u16.c: New test.
|
||||
* gcc.dg/vect/vect-widen-mult-u8.c: New test.
|
||||
* gcc.dg/vect/vect-widen-mult-s8.c: New test.
|
||||
* gcc.dg/vect/wrapv-vect-reduc-dot-s8.c: Removed.
|
||||
* gcc.dg/vect/wrapv-vect-reduc-dot-s8b.c: New reduced version of
|
||||
wrapv-vect-reduc-dot-s8.c.
|
||||
* lib/target-support.exp (check_effective_target_vect_unpack): New.
|
||||
(check_effective_target_vect_widen_sum_hi_to_si): Now also includes
|
||||
targets that support vec_unpack.
|
||||
(check_effective_target_vect_widen_sum_qi_to_hi): Likewise.
|
||||
(check_effective_target_vect_widen_mult_qi_to_hi): New.
|
||||
(check_effective_target_vect_widen_mult_hi_to_si): New.
|
||||
(check_effective_target_vect_widen_sum): Removed.
|
||||
|
||||
2006-11-08 Dorit Nuzman <dorit@il.ibm.com>
|
||||
|
||||
* gcc.dg/vect/vect-multitypes-8.c: New test.
|
||||
* lib/target-supports.exp (check_effective_target_vect_pack_mod): New.
|
||||
|
||||
2006-11-08 Dorit Nuzman <dorit@il.ibm.com>
|
||||
|
||||
* gcc.dg/vect/vect-multitypes-7.c: New test.
|
||||
|
||||
2006-11-08 Dorit Nuzman <dorit@il.ibm.com>
|
||||
|
||||
* gcc.dg/vect/vect-multitypes-4.c: New test.
|
||||
* gcc.dg/vect/vect-multitypes-5.c: New test.
|
||||
* gcc.dg/vect/vect-multitypes-6.c: New test.
|
||||
|
||||
2006-11-08 Dorit Nuzman <dorit@il.ibm.com>
|
||||
|
||||
* gcc.dg/vect/vect-multitypes-1.c: New test.
|
||||
* gcc.dg/vect/vect-multitypes-2.c: New test.
|
||||
* gcc.dg/vect/vect-multitypes-3.c: New test.
|
||||
|
||||
2006-11-07 Eric Christopher <echristo@apple.com>
|
||||
|
||||
* gcc.target/i386/builtin-bswap-1.c: Rewrite for 64-bit.
|
||||
|
@ -19,9 +19,6 @@ foo (int n)
|
||||
int ia[N];
|
||||
int ib[N];
|
||||
int ic[N];
|
||||
short sa[N];
|
||||
short sb[N];
|
||||
short sc[N];
|
||||
int i,j;
|
||||
int diff = 0;
|
||||
char cb[N];
|
||||
@ -80,16 +77,6 @@ foo (int n)
|
||||
fbar (a);
|
||||
fbar (d);
|
||||
|
||||
|
||||
/* Not vectorizable yet (two types with different nunits in vector). */
|
||||
for (i = 0; i < N; i++){
|
||||
ia[i] = ib[i] + ic[i];
|
||||
sa[i] = sb[i] + sc[i];
|
||||
}
|
||||
ibar (ia);
|
||||
sbar (sa);
|
||||
|
||||
|
||||
/* Not vetorizable yet (too conservative dependence test). */
|
||||
for (i = 0; i < N; i++){
|
||||
a[i] = b[i] + c[i];
|
||||
|
@ -1,40 +0,0 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 16
|
||||
|
||||
int
|
||||
main1 (void)
|
||||
{
|
||||
int i;
|
||||
short sb[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
int ia[N];
|
||||
|
||||
/* Type cast. */
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
ia[i] = (int) sb[i];
|
||||
}
|
||||
|
||||
|
||||
/* Check results. */
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (ia[i] != (int) sb[i])
|
||||
abort();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
return main1 ();
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
@ -3,42 +3,75 @@
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 16
|
||||
#define N 32
|
||||
|
||||
int
|
||||
main1 ()
|
||||
short sa[N];
|
||||
short sc[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
|
||||
16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
|
||||
short sb[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
|
||||
16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
|
||||
int ia[N];
|
||||
int ic[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,
|
||||
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
|
||||
int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,
|
||||
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
|
||||
|
||||
int main1 (int n)
|
||||
{
|
||||
int i;
|
||||
short sc[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
short sb[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
short sa[N];
|
||||
int ic[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
int ia[N];
|
||||
|
||||
/* Two types with different nunits in vector. */
|
||||
for (i = 0; i < N; i++)
|
||||
/* Multiple types with different sizes, used in idependent
|
||||
copmutations. Vectorizable. */
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
ia[i] = ib[i] + ic[i];
|
||||
sa[i+2] = sb[i] + sc[i];
|
||||
ia[i+1] = ib[i] + ic[i];
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
if (sa[i+2] != sb[i] + sc[i] || ia[i+1] != ib[i] + ic[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main2 (int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* Multiple types with different sizes, used in idependent
|
||||
copmutations. Vectorizable. */
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
ia[i+1] = ib[i] + ic[i];
|
||||
sa[i] = sb[i] + sc[i];
|
||||
}
|
||||
|
||||
/* Check results. */
|
||||
for (i = 0; i < N; i++)
|
||||
/* check results: */
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
if (ia[i] != ib[i] + ic[i] || sa[i] != sb[i] + sc[i])
|
||||
abort();
|
||||
if (sa[i] != sb[i] + sc[i] || ia[i+1] != ib[i] + ic[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
return main1 ();
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 1 "vect" } } */
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 (N-2);
|
||||
main2 (N-1);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 0 loops" 2 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "not vectorized: unsupported unaligned store" 2 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
|
@ -11,7 +11,7 @@ int main1 ()
|
||||
short sb[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
int ia[N];
|
||||
|
||||
/* Not vetorizable yet (type cast). */
|
||||
/* Requires type promotion (vector unpacking) support. */
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
ia[i] = (int) sb[i];
|
||||
@ -34,5 +34,5 @@ int main (void)
|
||||
return main1 ();
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_unpack } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
@ -38,7 +38,7 @@ int main (void)
|
||||
}
|
||||
|
||||
/* The store is unaligned, the load is aligned. For targets that support unaligned
|
||||
loads, peel to align the store and generated unaligned access for the loads.
|
||||
loads, peel to align the store and generate an unaligned access for the load.
|
||||
For targets that don't support unaligned loads, version for the store. */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
|
87
gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c
Normal file
87
gcc/testsuite/gcc.dg/vect/vect-multitypes-1.c
Normal file
@ -0,0 +1,87 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 32
|
||||
|
||||
short sa[N];
|
||||
short sb[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
|
||||
16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
|
||||
int ia[N];
|
||||
int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,
|
||||
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
|
||||
|
||||
/* Current peeling-for-alignment scheme will consider the 'sa[i+7]'
|
||||
access for peeling, and therefore will examine the option of
|
||||
using a peeling factor = VF-7%VF. This will result in a peeling factor 1,
|
||||
which will also align the access to 'ia[i+3]', and the loop could be
|
||||
vectorized on all targets that support unaligned loads.
|
||||
*/
|
||||
|
||||
int main1 (int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* Multiple types with different sizes, used in idependent
|
||||
copmutations. Vectorizable. */
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
sa[i+7] = sb[i];
|
||||
ia[i+3] = ib[i];
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
if (sa[i+7] != sb[i] || ia[i+3] != ib[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Current peeling-for-alignment scheme will consider the 'ia[i+3]'
|
||||
access for peeling, and therefore will examine the option of
|
||||
using a peeling factor = VF-3%VF. This will result in a peeling factor
|
||||
5 if VF=8, or 1 if VF=4,2. In either case, this will also align the access
|
||||
to 'sa[i+3]', and the loop could be vectorized on targets that support
|
||||
unaligned loads. */
|
||||
|
||||
int main2 (int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* Multiple types with different sizes, used in independent
|
||||
copmutations. Vectorizable. */
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
ia[i+3] = ib[i];
|
||||
sa[i+3] = sb[i];
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
if (sa[i+3] != sb[i] || ia[i+3] != ib[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 (N-7);
|
||||
main2 (N-3);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 4 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
67
gcc/testsuite/gcc.dg/vect/vect-multitypes-10.c
Normal file
67
gcc/testsuite/gcc.dg/vect/vect-multitypes-10.c
Normal file
@ -0,0 +1,67 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 64
|
||||
|
||||
unsigned char uX[N] __attribute__ ((__aligned__(16)));
|
||||
unsigned short uY[N] __attribute__ ((__aligned__(16)));
|
||||
unsigned int uresult[N];
|
||||
signed char X[N] __attribute__ ((__aligned__(16)));
|
||||
signed short Y[N] __attribute__ ((__aligned__(16)));
|
||||
int result[N];
|
||||
|
||||
/* Unsigned type promotion (hi->si) */
|
||||
int
|
||||
foo1(int len) {
|
||||
int i;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
uX[i] = 5;
|
||||
uresult[i] = (unsigned int)uY[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* Signed type promotion (hi->si) */
|
||||
int
|
||||
foo2(int len) {
|
||||
int i;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
uX[i] = 5;
|
||||
result[i] = (int)Y[i];
|
||||
}
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
X[i] = 16-i;
|
||||
uX[i] = 16-i;
|
||||
}
|
||||
|
||||
foo1 (N);
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
if (uresult[i] != (unsigned short)uY[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
foo2 (N);
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
if (result[i] != (short)Y[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_unpack } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
48
gcc/testsuite/gcc.dg/vect/vect-multitypes-2.c
Normal file
48
gcc/testsuite/gcc.dg/vect/vect-multitypes-2.c
Normal file
@ -0,0 +1,48 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 32
|
||||
|
||||
int main1 ()
|
||||
{
|
||||
int i;
|
||||
int ia[N];
|
||||
int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
short sa[N];
|
||||
short sb[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
char ca[N];
|
||||
char cb[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
|
||||
/* Multiple types with different sizes, used in independent
|
||||
cmputations. Vectorizable. All accesses aligned. */
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
ia[i] = ib[i];
|
||||
sa[i] = sb[i];
|
||||
ca[i] = cb[i];
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (ia[i] != ib[i]
|
||||
|| sa[i] != sb[i]
|
||||
|| ca[i] != cb[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
return main1 ();
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
57
gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c
Normal file
57
gcc/testsuite/gcc.dg/vect/vect-multitypes-3.c
Normal file
@ -0,0 +1,57 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 32
|
||||
|
||||
int ib[N] __attribute__ ((__aligned__(16))) =
|
||||
{0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
short sb[N] __attribute__ ((__aligned__(16))) =
|
||||
{0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
char cb[N] __attribute__ ((__aligned__(16))) =
|
||||
{0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
|
||||
int main1 (int n, int * __restrict__ pib,
|
||||
short * __restrict__ psb,
|
||||
char * __restrict__ pcb)
|
||||
{
|
||||
int i;
|
||||
int ia[N];
|
||||
short sa[N];
|
||||
char ca[N];
|
||||
|
||||
/* Multiple types with different sizes, used in independent
|
||||
computations. Vectorizable. The loads are misaligned. */
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
ia[i] = pib[i];
|
||||
sa[i] = psb[i];
|
||||
ca[i] = pcb[i];
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
if (ia[i] != pib[i]
|
||||
|| sa[i] != psb[i]
|
||||
|| ca[i] != pcb[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 (N, ib, sb, cb);
|
||||
main1 (N-3, ib, sb, &cb[2]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 3 "vect" {xfail vect_no_align } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
91
gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c
Normal file
91
gcc/testsuite/gcc.dg/vect/vect-multitypes-4.c
Normal file
@ -0,0 +1,91 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 32
|
||||
|
||||
unsigned short sa[N];
|
||||
unsigned short sc[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
|
||||
16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
|
||||
unsigned short sb[N] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
|
||||
16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
|
||||
unsigned int ia[N];
|
||||
unsigned int ic[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,
|
||||
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
|
||||
unsigned int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,
|
||||
0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
|
||||
|
||||
/* Current peeling-for-alignment scheme will consider the 'sa[i+7]'
|
||||
access for peeling, and therefore will examine the option of
|
||||
using a peeling factor = VF-7%VF. This will result in a peeling factor 1,
|
||||
which will also align the access to 'ia[i+3]', and the loop could be
|
||||
vectorized on all targets that support unaligned loads.
|
||||
*/
|
||||
|
||||
int main1 (int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* Multiple types with different sizes, used in independent
|
||||
copmutations. Vectorizable. */
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
sa[i+7] = sb[i] + sc[i];
|
||||
ia[i+3] = ib[i] + ic[i];
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
if (sa[i+7] != sb[i] + sc[i] || ia[i+3] != ib[i] + ic[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Current peeling-for-alignment scheme will consider the 'ia[i+3]'
|
||||
access for peeling, and therefore will examine the option of
|
||||
using a peeling factor = VF-3%VF. This will result in a peeling factor
|
||||
5 if VF=8, or 1 if VF=4,2. In either case, this will also align the access
|
||||
to 'sa[i+3]', and the loop could be vectorized on targets that support
|
||||
unaligned loads. */
|
||||
|
||||
int main2 (int n)
|
||||
{
|
||||
int i;
|
||||
|
||||
/* Multiple types with different sizes, used in independent
|
||||
copmutations. Vectorizable. */
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
ia[i+3] = ib[i] + ic[i];
|
||||
sa[i+3] = sb[i] + sc[i];
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
if (sa[i+3] != sb[i] + sc[i] || ia[i+3] != ib[i] + ic[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 (N-7);
|
||||
main2 (N-3);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Alignment of access forced using peeling" 2 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 8 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
51
gcc/testsuite/gcc.dg/vect/vect-multitypes-5.c
Normal file
51
gcc/testsuite/gcc.dg/vect/vect-multitypes-5.c
Normal file
@ -0,0 +1,51 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 32
|
||||
|
||||
int main1 ()
|
||||
{
|
||||
int i;
|
||||
unsigned int ia[N];
|
||||
unsigned int ic[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
unsigned int ib[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
unsigned short sa[N];
|
||||
unsigned short sc[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
unsigned short sb[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
unsigned char ca[N];
|
||||
unsigned char cc[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
unsigned char cb[N] = {0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
|
||||
/* Multiple types with different sizes, used in independent
|
||||
computations. Vectorizable. All accesses aligned. */
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
ia[i] = ib[i] + ic[i];
|
||||
sa[i] = sb[i] + sc[i];
|
||||
ca[i] = cb[i] + cc[i];
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (ia[i] != ib[i] + ic[i]
|
||||
|| sa[i] != sb[i] + sc[i]
|
||||
|| ca[i] != cb[i] + cc[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
return main1 ();
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
64
gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c
Normal file
64
gcc/testsuite/gcc.dg/vect/vect-multitypes-6.c
Normal file
@ -0,0 +1,64 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 32
|
||||
|
||||
unsigned int ic[N] __attribute__ ((__aligned__(16))) =
|
||||
{0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
unsigned int ib[N] __attribute__ ((__aligned__(16))) =
|
||||
{0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
unsigned short sc[N] __attribute__ ((__aligned__(16))) =
|
||||
{0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
unsigned short sb[N] __attribute__ ((__aligned__(16))) =
|
||||
{0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
unsigned char cc[N] __attribute__ ((__aligned__(16))) =
|
||||
{0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
unsigned char cb[N] __attribute__ ((__aligned__(16))) =
|
||||
{0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45,0,3,6,9,12,15,18,21,24,27,30,33,36,39,42,45};
|
||||
|
||||
int main1 (int n,
|
||||
unsigned int * __restrict__ pic, unsigned int * __restrict__ pib,
|
||||
unsigned short * __restrict__ psc, unsigned short * __restrict__ psb,
|
||||
unsigned char * __restrict__ pcc, unsigned char * __restrict__ pcb)
|
||||
{
|
||||
int i;
|
||||
unsigned int ia[N];
|
||||
unsigned short sa[N];
|
||||
unsigned char ca[N];
|
||||
|
||||
/* Multiple types with different sizes, used in independent
|
||||
computations. Vectorizable. The loads are misaligned. */
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
ia[i] = pib[i] + pic[i];
|
||||
sa[i] = psb[i] + psc[i];
|
||||
ca[i] = pcb[i] + pcc[i];
|
||||
}
|
||||
|
||||
/* check results: */
|
||||
for (i = 0; i < n; i++)
|
||||
{
|
||||
if (ia[i] != pib[i] + pic[i]
|
||||
|| sa[i] != psb[i] + psc[i]
|
||||
|| ca[i] != pcb[i] + pcc[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
check_vect ();
|
||||
|
||||
main1 (N, ic, ib, sc, sb, cc, cb);
|
||||
main1 (N-3, ic, ib, &sc[1], sb, cc, &cb[2]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail vect_no_align } } } */
|
||||
/* { dg-final { scan-tree-dump-times "Vectorizing an unaligned access" 6 "vect" {xfail vect_no_align } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
51
gcc/testsuite/gcc.dg/vect/vect-multitypes-7.c
Normal file
51
gcc/testsuite/gcc.dg/vect/vect-multitypes-7.c
Normal file
@ -0,0 +1,51 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
#include <stdio.h>
|
||||
|
||||
#define N 64
|
||||
|
||||
#define DOT1 43680
|
||||
#define DOT2 -20832
|
||||
|
||||
signed short X[N] __attribute__ ((__aligned__(16)));
|
||||
signed short Y[N] __attribute__ ((__aligned__(16)));
|
||||
unsigned char CX[N] __attribute__ ((__aligned__(16)));
|
||||
|
||||
void
|
||||
foo1(int len) {
|
||||
int i;
|
||||
int result1 = 0;
|
||||
short prod;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
result1 += (X[i] * Y[i]);
|
||||
CX[i] = 5;
|
||||
}
|
||||
|
||||
if (result1 != DOT1)
|
||||
abort ();
|
||||
}
|
||||
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i, dot1, dot2;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
X[i] = i;
|
||||
Y[i] = 64-i;
|
||||
CX[i] = i;
|
||||
}
|
||||
|
||||
foo1 (N);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_sdot_hi } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
50
gcc/testsuite/gcc.dg/vect/vect-multitypes-8.c
Normal file
50
gcc/testsuite/gcc.dg/vect/vect-multitypes-8.c
Normal file
@ -0,0 +1,50 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 64
|
||||
|
||||
unsigned char uX[N] __attribute__ ((__aligned__(16)));
|
||||
unsigned char uresultX[N];
|
||||
unsigned int uY[N] __attribute__ ((__aligned__(16)));
|
||||
unsigned short uresultY[N];
|
||||
|
||||
/* Unsigned type demotion (si->hi) */
|
||||
|
||||
int
|
||||
foo1(int len) {
|
||||
int i;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
uresultX[i] = uX[i];
|
||||
uresultY[i] = (unsigned short)uY[i];
|
||||
}
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
uX[i] = 16-i;
|
||||
uY[i] = 16-i;
|
||||
}
|
||||
|
||||
foo1 (N);
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
if (uresultX[i] != uX[i])
|
||||
abort ();
|
||||
if (uresultY[i] != (unsigned short)uY[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_pack_mod } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
63
gcc/testsuite/gcc.dg/vect/vect-multitypes-9.c
Normal file
63
gcc/testsuite/gcc.dg/vect/vect-multitypes-9.c
Normal file
@ -0,0 +1,63 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 64
|
||||
|
||||
unsigned char uX[N] __attribute__ ((__aligned__(16)));
|
||||
unsigned short uresult[N];
|
||||
signed char X[N] __attribute__ ((__aligned__(16)));
|
||||
short result[N];
|
||||
|
||||
/* Unsigned type promotion (qi->hi) */
|
||||
int
|
||||
foo1(int len) {
|
||||
int i;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
uresult[i] = (unsigned short)uX[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* Signed type promotion (qi->hi) */
|
||||
int
|
||||
foo2(int len) {
|
||||
int i;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
result[i] = (short)X[i];
|
||||
}
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
X[i] = 16-i;
|
||||
uX[i] = 16-i;
|
||||
}
|
||||
|
||||
foo1 (N);
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
if (uresult[i] != (unsigned short)uX[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
foo2 (N);
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
if (result[i] != (short)X[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_unpack } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
@ -50,5 +50,6 @@ main (void)
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_sdot_hi } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
||||
|
57
gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c
Normal file
57
gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8a.c
Normal file
@ -0,0 +1,57 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 64
|
||||
|
||||
#define DOT1 43680
|
||||
|
||||
signed char X[N] __attribute__ ((__aligned__(16)));
|
||||
signed char Y[N] __attribute__ ((__aligned__(16)));
|
||||
|
||||
/* char->short->int dot product.
|
||||
The dot-product pattern should be detected.
|
||||
Vectorizable on vect_sdot_qi targets (targets that support dot-product of
|
||||
signed chars).
|
||||
|
||||
In the future could also be vectorized as widening-mult + widening-summation,
|
||||
or with type-conversion support.
|
||||
*/
|
||||
int
|
||||
foo1(int len) {
|
||||
int i;
|
||||
int result = 0;
|
||||
short prod;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
prod = X[i] * Y[i];
|
||||
result += prod;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i, dot1;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
X[i] = i;
|
||||
Y[i] = 64-i;
|
||||
}
|
||||
|
||||
dot1 = foo1 (N);
|
||||
if (dot1 != DOT1)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_sdot_qi } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_widen_mult_qi_to_hi && vect_widen_sum_hi_to_si } } } } */
|
||||
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
@ -5,34 +5,11 @@
|
||||
|
||||
#define N 64
|
||||
|
||||
#define DOT1 43680
|
||||
#define DOT2 -21856
|
||||
#define DOT3 43680
|
||||
|
||||
signed char X[N] __attribute__ ((__aligned__(16)));
|
||||
signed char Y[N] __attribute__ ((__aligned__(16)));
|
||||
|
||||
/* char->short->int dot product.
|
||||
The dot-product pattern should be detected.
|
||||
Vectorizable on vect_sdot_qi targets (targets that support dot-product of
|
||||
signed chars).
|
||||
|
||||
In the future could also be vectorized as widening-mult + widening-summation,
|
||||
or with type-conversion support.
|
||||
*/
|
||||
int
|
||||
foo1(int len) {
|
||||
int i;
|
||||
int result = 0;
|
||||
short prod;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
prod = X[i] * Y[i];
|
||||
result += prod;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* char->short->short dot product.
|
||||
The dot-product pattern should be detected.
|
||||
The reduction is currently not vectorized becaus of the signed->unsigned->signed
|
||||
@ -45,9 +22,8 @@ foo1(int len) {
|
||||
When the dot-product is detected, the loop should be vectorized on vect_sdot_qi
|
||||
targets (targets that support dot-product of signed char).
|
||||
This test would currently fail to vectorize on targets that support
|
||||
dot-product of chars when the accumulator is int.
|
||||
|
||||
In the future could also be vectorized as widening-mult + summation,
|
||||
dot-product of chars into an int accumulator.
|
||||
Alternatively, the loop could also be vectorized as widening-mult + summation,
|
||||
or with type-conversion support.
|
||||
*/
|
||||
short
|
||||
@ -61,23 +37,9 @@ foo2(int len) {
|
||||
return result;
|
||||
}
|
||||
|
||||
/* char->int->int dot product.
|
||||
Not detected as a dot-product pattern.
|
||||
Currently fails to be vectorized due to presence of type conversions. */
|
||||
int
|
||||
foo3(int len) {
|
||||
int i;
|
||||
int result = 0;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
result += (X[i] * Y[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i, dot1, dot3;
|
||||
int i;
|
||||
short dot2;
|
||||
|
||||
check_vect ();
|
||||
@ -87,25 +49,16 @@ int main (void)
|
||||
Y[i] = 64-i;
|
||||
}
|
||||
|
||||
dot1 = foo1 (N);
|
||||
if (dot1 != DOT1)
|
||||
abort ();
|
||||
|
||||
dot2 = foo2 (N);
|
||||
if (dot2 != DOT2)
|
||||
abort ();
|
||||
|
||||
dot3 = foo3 (N);
|
||||
if (dot3 != DOT3)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 2 "vect" { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 1 "vect" { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" } } */
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_sdot_qi } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
|
||||
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
47
gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8c.c
Normal file
47
gcc/testsuite/gcc.dg/vect/vect-reduc-dot-s8c.c
Normal file
@ -0,0 +1,47 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 64
|
||||
|
||||
#define DOT3 43680
|
||||
|
||||
signed char X[N] __attribute__ ((__aligned__(16)));
|
||||
signed char Y[N] __attribute__ ((__aligned__(16)));
|
||||
|
||||
/* char->int->int dot product.
|
||||
Not detected as a dot-product pattern.
|
||||
Currently fails to be vectorized due to presence of type conversions. */
|
||||
int
|
||||
foo3(int len) {
|
||||
int i;
|
||||
int result = 0;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
result += (X[i] * Y[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i, dot3;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
X[i] = i;
|
||||
Y[i] = 64-i;
|
||||
}
|
||||
|
||||
dot3 = foo3 (N);
|
||||
if (dot3 != DOT3)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
|
||||
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
52
gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u16a.c
Normal file
52
gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u16a.c
Normal file
@ -0,0 +1,52 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 64
|
||||
|
||||
#define DOT1 43680
|
||||
#define DOT2 43680
|
||||
|
||||
unsigned short X[N] __attribute__ ((__aligned__(16)));
|
||||
unsigned short Y[N] __attribute__ ((__aligned__(16)));
|
||||
|
||||
/* short->short->int dot product.
|
||||
Not detected as a dot-product pattern.
|
||||
Requires support for non-widneing multiplication and widening-summation. */
|
||||
unsigned int
|
||||
foo1(int len) {
|
||||
int i;
|
||||
unsigned int result = 0;
|
||||
unsigned short prod;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
prod = X[i] * Y[i];
|
||||
result += prod;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
unsigned int dot1;
|
||||
int i;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
X[i] = i;
|
||||
Y[i] = 64-i;
|
||||
}
|
||||
|
||||
dot1 = foo1 (N);
|
||||
if (dot1 != DOT1)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 1 "vect" { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_short_mult && vect_widen_sum_hi_to_si } } } } */
|
||||
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
@ -5,28 +5,11 @@
|
||||
|
||||
#define N 64
|
||||
|
||||
#define DOT1 43680
|
||||
#define DOT2 43680
|
||||
|
||||
unsigned short X[N] __attribute__ ((__aligned__(16)));
|
||||
unsigned short Y[N] __attribute__ ((__aligned__(16)));
|
||||
|
||||
/* short->short->int dot product.
|
||||
Not detected as a dot-product pattern.
|
||||
Not vectorized due to presence of type-conversions. */
|
||||
unsigned int
|
||||
foo1(int len) {
|
||||
int i;
|
||||
unsigned int result = 0;
|
||||
unsigned short prod;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
prod = X[i] * Y[i];
|
||||
result += prod;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* short->int->int dot product.
|
||||
Currently not detected as a dot-product pattern: the multiplication
|
||||
promotes the ushorts to int, and then the product is promoted to unsigned
|
||||
@ -46,7 +29,7 @@ foo2(int len) {
|
||||
|
||||
int main (void)
|
||||
{
|
||||
unsigned int dot1, dot2;
|
||||
unsigned int dot2;
|
||||
int i;
|
||||
|
||||
check_vect ();
|
||||
@ -56,10 +39,6 @@ int main (void)
|
||||
Y[i] = 64-i;
|
||||
}
|
||||
|
||||
dot1 = foo1 (N);
|
||||
if (dot1 != DOT1)
|
||||
abort ();
|
||||
|
||||
dot2 = foo2 (N);
|
||||
if (dot2 != DOT2)
|
||||
abort ();
|
||||
@ -69,9 +48,9 @@ int main (void)
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 1 "vect" { xfail *-*-* } } } */
|
||||
|
||||
/* Once the dot-product pattern is detected in the second loop, we expect
|
||||
/* Once the dot-product pattern is detected, we expect
|
||||
that loop to be vectorized on vect_udot_hi targets (targets that support
|
||||
dot-product of unsigned shorts). */
|
||||
dot-product of unsigned shorts) and targets that support widening multiplication. */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
|
||||
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
@ -1,101 +0,0 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 64
|
||||
|
||||
#define DOT1 43680
|
||||
#define DOT2 43680
|
||||
#define DOT3 43680
|
||||
|
||||
unsigned char X[N] __attribute__ ((__aligned__(16)));
|
||||
unsigned char Y[N] __attribute__ ((__aligned__(16)));
|
||||
|
||||
/* char->short->int dot product.
|
||||
Detected as a dot-product pattern.
|
||||
Should be vectorized on targets that support dot-product for unsigned chars.
|
||||
*/
|
||||
unsigned int
|
||||
foo1(int len) {
|
||||
int i;
|
||||
unsigned int result = 0;
|
||||
unsigned short prod;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
prod = X[i] * Y[i];
|
||||
result += prod;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* char->short->short dot product.
|
||||
Detected as a dot-product pattern.
|
||||
Should be vectorized on targets that support dot-product for unsigned chars.
|
||||
This test currently fails to vectorize on targets that support dot-product
|
||||
of chars only when the accumulator is int.
|
||||
*/
|
||||
unsigned short
|
||||
foo2(int len) {
|
||||
int i;
|
||||
unsigned short result = 0;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
result += (unsigned short)(X[i] * Y[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* char->int->int dot product.
|
||||
Not detected as a dot-product.
|
||||
Doesn't get vectorized due to presence of type converisons. */
|
||||
unsigned int
|
||||
foo3(int len) {
|
||||
int i;
|
||||
unsigned int result = 0;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
result += (X[i] * Y[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
unsigned int dot1, dot3;
|
||||
unsigned short dot2;
|
||||
int i;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
X[i] = i;
|
||||
Y[i] = 64-i;
|
||||
}
|
||||
|
||||
dot1 = foo1 (N);
|
||||
if (dot1 != DOT1)
|
||||
abort ();
|
||||
|
||||
dot2 = foo2 (N);
|
||||
if (dot2 != DOT2)
|
||||
abort ();
|
||||
|
||||
dot3 = foo3 (N);
|
||||
if (dot3 != DOT3)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 2 "vect" } } */
|
||||
|
||||
/* When the vectorizer is enhanced to vectorize foo2 (accumulation into short) for
|
||||
targets that support accumulation into int (powerpc, ia64) we'd have:
|
||||
dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_udot_qi } }
|
||||
*/
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_udot_qi } } } */
|
||||
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
61
gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c
Normal file
61
gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8a.c
Normal file
@ -0,0 +1,61 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 64
|
||||
|
||||
#define DOT 43680
|
||||
|
||||
unsigned char X[N] __attribute__ ((__aligned__(16)));
|
||||
unsigned char Y[N] __attribute__ ((__aligned__(16)));
|
||||
|
||||
/* char->short->int dot product.
|
||||
Detected as a dot-product pattern.
|
||||
Should be vectorized on targets that support dot-product for unsigned chars
|
||||
(vect_udot_qi),
|
||||
and on targets that support widening-multiplication and widening-summation
|
||||
(vect_widen_mult_qi && vec_widen_sum_qi_to_si).
|
||||
Widening-multiplication can also be supported by type promotion and non-widening
|
||||
multiplication (vect_unpack && vect_short_mult);
|
||||
Widening summation can also be supported by type promotion and non-widening
|
||||
summation (vect_unpack).
|
||||
*/
|
||||
unsigned int
|
||||
foo (int len) {
|
||||
int i;
|
||||
unsigned int result = 0;
|
||||
unsigned short prod;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
prod = X[i] * Y[i];
|
||||
result += prod;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
unsigned int dot;
|
||||
int i;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
X[i] = i;
|
||||
Y[i] = 64-i;
|
||||
}
|
||||
|
||||
dot = foo (N);
|
||||
if (dot != DOT)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_udot_qi } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { vect_widen_mult_qi_to_hi && vect_widen_sum_qi_to_si } } } } */
|
||||
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
60
gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8b.c
Normal file
60
gcc/testsuite/gcc.dg/vect/vect-reduc-dot-u8b.c
Normal file
@ -0,0 +1,60 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 64
|
||||
|
||||
#define DOT 43680
|
||||
|
||||
unsigned char X[N] __attribute__ ((__aligned__(16)));
|
||||
unsigned char Y[N] __attribute__ ((__aligned__(16)));
|
||||
|
||||
/* char->short->short dot product.
|
||||
Detected as a dot-product pattern.
|
||||
Should be vectorized on targets that support dot-product for unsigned chars,
|
||||
but currently this test cannot be vectorized as a dot-product on targets
|
||||
that support char->short->int dot-product.
|
||||
Alternatively, this test can be vectorized using vect_widen_mult_qi (or
|
||||
vect_unpack and non-widening multplication: vect_unpack && vect_short_mult).
|
||||
*/
|
||||
unsigned short
|
||||
foo (int len) {
|
||||
int i;
|
||||
unsigned short result = 0;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
result += (unsigned short)(X[i] * Y[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
unsigned short dot;
|
||||
int i;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
X[i] = i;
|
||||
Y[i] = 64-i;
|
||||
}
|
||||
|
||||
dot = foo (N);
|
||||
if (dot != DOT)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 1 "vect" } } */
|
||||
|
||||
/* When the vectorizer is enhanced to vectorize accumulation into short for
|
||||
targets that support accumulation into int (powerpc, ia64) we'd have:
|
||||
dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_udot_qi || vect_widen_mult_qi_to_hi } }
|
||||
*/
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" {target vect_widen_mult_qi_to_hi} } } */
|
||||
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
45
gcc/testsuite/gcc.dg/vect/vect-widen-mult-s16.c
Normal file
45
gcc/testsuite/gcc.dg/vect/vect-widen-mult-s16.c
Normal file
@ -0,0 +1,45 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 64
|
||||
|
||||
short X[N] __attribute__ ((__aligned__(16)));
|
||||
short Y[N] __attribute__ ((__aligned__(16)));
|
||||
int result[N];
|
||||
|
||||
/* short->int widening-mult */
|
||||
int
|
||||
foo1(int len) {
|
||||
int i;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
result[i] = X[i] * Y[i];
|
||||
}
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
X[i] = i;
|
||||
Y[i] = 64-i;
|
||||
}
|
||||
|
||||
foo1 (N);
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
if (result[i] != X[i] * Y[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
45
gcc/testsuite/gcc.dg/vect/vect-widen-mult-s8.c
Normal file
45
gcc/testsuite/gcc.dg/vect/vect-widen-mult-s8.c
Normal file
@ -0,0 +1,45 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 64
|
||||
|
||||
signed char X[N] __attribute__ ((__aligned__(16)));
|
||||
signed char Y[N] __attribute__ ((__aligned__(16)));
|
||||
short result[N];
|
||||
|
||||
/* char->short widening-mult */
|
||||
int
|
||||
foo1(int len) {
|
||||
int i;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
result[i] = X[i] * Y[i];
|
||||
}
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
X[i] = i;
|
||||
Y[i] = 64-i;
|
||||
}
|
||||
|
||||
foo1 (N);
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
if (result[i] != X[i] * Y[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_qi_to_hi } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
45
gcc/testsuite/gcc.dg/vect/vect-widen-mult-sum.c
Normal file
45
gcc/testsuite/gcc.dg/vect/vect-widen-mult-sum.c
Normal file
@ -0,0 +1,45 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 64
|
||||
#define SUM 0
|
||||
|
||||
/* Require widening-mult or data-unpacking (for the type promotion). */
|
||||
int
|
||||
main1 (short *in, int off, short scale, int n)
|
||||
{
|
||||
int i;
|
||||
int sum = 0;
|
||||
|
||||
for (i = 0; i < n; i++) {
|
||||
sum += ((int) in[i] * (int) in[i+off]) >> scale;
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i;
|
||||
int sum;
|
||||
short X[N];
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
X[i] = 16-i;
|
||||
}
|
||||
|
||||
sum = main1 (X, 1, 16, N-1);
|
||||
|
||||
if (sum != SUM)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_hi_to_si } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
47
gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c
Normal file
47
gcc/testsuite/gcc.dg/vect/vect-widen-mult-u16.c
Normal file
@ -0,0 +1,47 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 64
|
||||
|
||||
unsigned short X[N] __attribute__ ((__aligned__(16)));
|
||||
unsigned short Y[N] __attribute__ ((__aligned__(16)));
|
||||
unsigned int result[N];
|
||||
|
||||
/* short->int widening-mult */
|
||||
int
|
||||
foo1(int len) {
|
||||
int i;
|
||||
|
||||
/* Not vectorized because X[i] and Y[i] are casted to 'int'
|
||||
so the widening multiplication pattern is not recognized. */
|
||||
for (i=0; i<len; i++) {
|
||||
result[i] = (unsigned int)(X[i] * Y[i]);
|
||||
}
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
X[i] = i;
|
||||
Y[i] = 64-i;
|
||||
}
|
||||
|
||||
foo1 (N);
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
if (result[i] != X[i] * Y[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { xfail *-*-* } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
45
gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c
Normal file
45
gcc/testsuite/gcc.dg/vect/vect-widen-mult-u8.c
Normal file
@ -0,0 +1,45 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 64
|
||||
|
||||
unsigned char X[N] __attribute__ ((__aligned__(16)));
|
||||
unsigned char Y[N] __attribute__ ((__aligned__(16)));
|
||||
unsigned short result[N];
|
||||
|
||||
/* char->short widening-mult */
|
||||
int
|
||||
foo1(int len) {
|
||||
int i;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
result[i] = X[i] * Y[i];
|
||||
}
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
X[i] = i;
|
||||
Y[i] = 64-i;
|
||||
}
|
||||
|
||||
foo1 (N);
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
if (result[i] != X[i] * Y[i])
|
||||
abort ();
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_qi_to_hi } } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
@ -1,108 +0,0 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 64
|
||||
|
||||
#define DOT1 43680
|
||||
#define DOT2 -21856
|
||||
#define DOT3 43680
|
||||
|
||||
signed char X[N] __attribute__ ((__aligned__(16)));
|
||||
signed char Y[N] __attribute__ ((__aligned__(16)));
|
||||
|
||||
/* char->short->int dot product.
|
||||
The dot-product pattern should be detected.
|
||||
Vectorizable on vect_sdot_qi targets (targets that support dot-product of
|
||||
signed chars).
|
||||
|
||||
In the future could also be vectorized as widening-mult + widening-summation,
|
||||
or with type-conversion support.
|
||||
*/
|
||||
int
|
||||
foo1(int len) {
|
||||
int i;
|
||||
int result = 0;
|
||||
short prod;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
prod = X[i] * Y[i];
|
||||
result += prod;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* char->short->short dot product.
|
||||
The dot-product pattern should be detected.
|
||||
Should be vectorized on vect_sdot_qi targets (targets that support
|
||||
dot-product of signed char).
|
||||
This test currently fails to vectorize on targets that support
|
||||
dot-product of chars when the accumulator is int.
|
||||
|
||||
In the future could also be vectorized as widening-mult + summation,
|
||||
or with type-conversion support.
|
||||
*/
|
||||
short
|
||||
foo2(int len) {
|
||||
int i;
|
||||
short result = 0;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
result += (X[i] * Y[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/* char->int->int dot product.
|
||||
Not detected as a dot-product pattern.
|
||||
Currently fails to be vectorized due to presence of type conversions. */
|
||||
int
|
||||
foo3(int len) {
|
||||
int i;
|
||||
int result = 0;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
result += (X[i] * Y[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i, dot1, dot3;
|
||||
short dot2;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
X[i] = i;
|
||||
Y[i] = 64-i;
|
||||
}
|
||||
|
||||
dot1 = foo1 (N);
|
||||
if (dot1 != DOT1)
|
||||
abort ();
|
||||
|
||||
dot2 = foo2 (N);
|
||||
if (dot2 != DOT2)
|
||||
abort ();
|
||||
|
||||
dot3 = foo3 (N);
|
||||
if (dot3 != DOT3)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 2 "vect" } } */
|
||||
|
||||
/* When vectorizer is enhanced to vectorize foo2 (accumulation into short) for targets
|
||||
that support accumulation into int (ia64) we'd have:
|
||||
dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { target vect_sdot_qi } }
|
||||
*/
|
||||
/* In the meantime expect: */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 2 "vect" { xfail *-*-* } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_sdot_qi } } } */
|
||||
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
62
gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-dot-s8b.c
Normal file
62
gcc/testsuite/gcc.dg/vect/wrapv-vect-reduc-dot-s8b.c
Normal file
@ -0,0 +1,62 @@
|
||||
/* { dg-require-effective-target vect_int } */
|
||||
|
||||
#include <stdarg.h>
|
||||
#include "tree-vect.h"
|
||||
|
||||
#define N 64
|
||||
|
||||
#define DOT -21856
|
||||
|
||||
signed char X[N] __attribute__ ((__aligned__(16)));
|
||||
signed char Y[N] __attribute__ ((__aligned__(16)));
|
||||
|
||||
/* char->short->short dot product.
|
||||
The dot-product pattern should be detected.
|
||||
Should be vectorized on vect_sdot_qi targets (targets that support
|
||||
dot-product of signed char).
|
||||
This test currently fails to vectorize on targets that support
|
||||
dot-product of chars into and int accumulator.
|
||||
Can also be vectorized as widening-mult + summation,
|
||||
or with type-conversion support.
|
||||
*/
|
||||
short
|
||||
foo(int len) {
|
||||
int i;
|
||||
short result = 0;
|
||||
|
||||
for (i=0; i<len; i++) {
|
||||
result += (X[i] * Y[i]);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
int main (void)
|
||||
{
|
||||
int i;
|
||||
short dot;
|
||||
|
||||
check_vect ();
|
||||
|
||||
for (i=0; i<N; i++) {
|
||||
X[i] = i;
|
||||
Y[i] = 64-i;
|
||||
}
|
||||
|
||||
dot = foo (N);
|
||||
if (dot != DOT)
|
||||
abort ();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vect_recog_dot_prod_pattern: detected" 1 "vect" } } */
|
||||
/* { dg-final { scan-tree-dump-times "vect_recog_widen_mult_pattern: detected" 1 "vect" } } */
|
||||
|
||||
/* When vectorizer is enhanced to vectorize accumulation into short for targets
|
||||
that support accumulation into int (e.g. ia64) we'd have:
|
||||
dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_sdot_qi } }
|
||||
*/
|
||||
/* In the meantime expect: */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_widen_mult_qi_to_hi } } } */
|
||||
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
@ -1491,18 +1491,19 @@ proc check_effective_target_vect_no_bitwise { } {
|
||||
|
||||
# Return 1 if the target plus current options supports a vector
|
||||
# widening summation of *short* args into *int* result, 0 otherwise.
|
||||
# A target can also support this widening summation if it can support
|
||||
# promotion (unpacking) from shorts to ints.
|
||||
#
|
||||
# This won't change for different subtargets so cache the result.
|
||||
|
||||
proc check_effective_target_vect_widen_sum_hi_to_si { } {
|
||||
global et_vect_widen_sum_hi_to_si
|
||||
|
||||
|
||||
if [info exists et_vect_widen_sum_hi_to_si_saved] {
|
||||
verbose "check_effective_target_vect_widen_sum_hi_to_si: using cached result" 2
|
||||
} else {
|
||||
set et_vect_widen_sum_hi_to_si_saved 0
|
||||
if { [istarget powerpc*-*-*]
|
||||
|| [istarget ia64-*-*] } {
|
||||
set et_vect_widen_sum_hi_to_si_saved [check_effective_target_vect_unpack]
|
||||
if { [istarget powerpc*-*-*] } {
|
||||
set et_vect_widen_sum_hi_to_si_saved 1
|
||||
}
|
||||
}
|
||||
@ -1512,19 +1513,21 @@ proc check_effective_target_vect_widen_sum_hi_to_si { } {
|
||||
|
||||
# Return 1 if the target plus current options supports a vector
|
||||
# widening summation of *char* args into *short* result, 0 otherwise.
|
||||
# A target can also support this widening summation if it can support
|
||||
# promotion (unpacking) from chars to shorts.
|
||||
#
|
||||
# This won't change for different subtargets so cache the result.
|
||||
|
||||
proc check_effective_target_vect_widen_sum_qi_to_hi { } {
|
||||
global et_vect_widen_sum_qi_to_hi
|
||||
|
||||
|
||||
if [info exists et_vect_widen_sum_qi_to_hi_saved] {
|
||||
verbose "check_effective_target_vect_widen_sum_qi_to_hi: using cached result" 2
|
||||
} else {
|
||||
set et_vect_widen_sum_qi_to_hi_saved 0
|
||||
if { [istarget ia64-*-*] } {
|
||||
if { [check_effective_target_vect_unpack] } {
|
||||
set et_vect_widen_sum_qi_to_hi_saved 1
|
||||
}
|
||||
}
|
||||
}
|
||||
verbose "check_effective_target_vect_widen_sum_qi_to_hi: returning $et_vect_widen_sum_qi_to_hi_saved" 2
|
||||
return $et_vect_widen_sum_qi_to_hi_saved
|
||||
@ -1537,7 +1540,7 @@ proc check_effective_target_vect_widen_sum_qi_to_hi { } {
|
||||
|
||||
proc check_effective_target_vect_widen_sum_qi_to_si { } {
|
||||
global et_vect_widen_sum_qi_to_si
|
||||
|
||||
|
||||
if [info exists et_vect_widen_sum_qi_to_si_saved] {
|
||||
verbose "check_effective_target_vect_widen_sum_qi_to_si: using cached result" 2
|
||||
} else {
|
||||
@ -1551,24 +1554,61 @@ proc check_effective_target_vect_widen_sum_qi_to_si { } {
|
||||
}
|
||||
|
||||
# Return 1 if the target plus current options supports a vector
|
||||
# widening summation, 0 otherwise.
|
||||
# widening multiplication of *char* args into *short* result, 0 otherwise.
|
||||
# A target can also support this widening multplication if it can support
|
||||
# promotion (unpacking) from chars to shorts, and vect_short_mult (non-widening
|
||||
# multiplication of shorts).
|
||||
#
|
||||
# This won't change for different subtargets so cache the result.
|
||||
|
||||
proc check_effective_target_vect_widen_sum { } {
|
||||
global et_vect_widen_sum
|
||||
|
||||
if [info exists et_vect_widen_sum_saved] {
|
||||
verbose "check_effective_target_vect_widen_sum: using cached result" 2
|
||||
|
||||
|
||||
proc check_effective_target_vect_widen_mult_qi_to_hi { } {
|
||||
global et_vect_widen_mult_qi_to_hi
|
||||
|
||||
if [info exists et_vect_widen_mult_qi_to_hi_saved] {
|
||||
verbose "check_effective_target_vect_widen_mult_qi_to_hi: using cached result" 2
|
||||
} else {
|
||||
set et_vect_widen_sum_saved 0
|
||||
if { [istarget powerpc*-*-*]
|
||||
|| [istarget ia64-*-*] } {
|
||||
set et_vect_widen_sum_saved 1
|
||||
if { [check_effective_target_vect_unpack]
|
||||
&& [check_effective_target_vect_short_mult] } {
|
||||
set et_vect_widen_mult_qi_to_hi_saved 1
|
||||
} else {
|
||||
set et_vect_widen_mult_qi_to_hi_saved 0
|
||||
}
|
||||
if { [istarget powerpc*-*-*] } {
|
||||
set et_vect_widen_mult_qi_to_hi_saved 1
|
||||
}
|
||||
}
|
||||
verbose "check_effective_target_vect_widen_sum: returning $et_vect_widen_sum_saved" 2
|
||||
return $et_vect_widen_sum_saved
|
||||
verbose "check_effective_target_vect_widen_mult_qi_to_hi: returning $et_vect_widen_mult_qi_to_hi_saved" 2
|
||||
return $et_vect_widen_mult_qi_to_hi_saved
|
||||
}
|
||||
|
||||
# Return 1 if the target plus current options supports a vector
|
||||
# widening multiplication of *short* args into *int* result, 0 otherwise.
|
||||
# A target can also support this widening multplication if it can support
|
||||
# promotion (unpacking) from shorts to ints, and vect_int_mult (non-widening
|
||||
# multiplication of ints).
|
||||
#
|
||||
# This won't change for different subtargets so cache the result.
|
||||
|
||||
|
||||
proc check_effective_target_vect_widen_mult_hi_to_si { } {
|
||||
global et_vect_widen_mult_hi_to_si
|
||||
|
||||
if [info exists et_vect_widen_mult_hi_to_si_saved] {
|
||||
verbose "check_effective_target_vect_widen_mult_hi_to_si: using cached result" 2
|
||||
} else {
|
||||
if { [check_effective_target_vect_unpack]
|
||||
&& [check_effective_target_vect_int_mult] } {
|
||||
set et_vect_widen_mult_hi_to_si_saved 1
|
||||
} else {
|
||||
set et_vect_widen_mult_hi_to_si_saved 0
|
||||
}
|
||||
if { [istarget powerpc*-*-*] } {
|
||||
set et_vect_widen_mult_hi_to_si_saved 1
|
||||
}
|
||||
}
|
||||
verbose "check_effective_target_vect_widen_mult_hi_to_si: returning $et_vect_widen_mult_hi_to_si_saved" 2
|
||||
return $et_vect_widen_mult_hi_to_si_saved
|
||||
}
|
||||
|
||||
# Return 1 if the target plus current options supports a vector
|
||||
@ -1583,9 +1623,6 @@ proc check_effective_target_vect_sdot_qi { } {
|
||||
verbose "check_effective_target_vect_sdot_qi: using cached result" 2
|
||||
} else {
|
||||
set et_vect_sdot_qi_saved 0
|
||||
if { [istarget ia64-*-*] } {
|
||||
set et_vect_sdot_qi_saved 1
|
||||
}
|
||||
}
|
||||
verbose "check_effective_target_vect_sdot_qi: returning $et_vect_sdot_qi_saved" 2
|
||||
return $et_vect_sdot_qi_saved
|
||||
@ -1603,8 +1640,7 @@ proc check_effective_target_vect_udot_qi { } {
|
||||
verbose "check_effective_target_vect_udot_qi: using cached result" 2
|
||||
} else {
|
||||
set et_vect_udot_qi_saved 0
|
||||
if { [istarget powerpc*-*-*]
|
||||
|| [istarget ia64-*-*] } {
|
||||
if { [istarget powerpc*-*-*] } {
|
||||
set et_vect_udot_qi_saved 1
|
||||
}
|
||||
}
|
||||
@ -1626,8 +1662,7 @@ proc check_effective_target_vect_sdot_hi { } {
|
||||
set et_vect_sdot_hi_saved 0
|
||||
if { [istarget powerpc*-*-*]
|
||||
|| [istarget i?86-*-*]
|
||||
|| [istarget x86_64-*-*]
|
||||
|| [istarget ia64-*-*] } {
|
||||
|| [istarget x86_64-*-*] } {
|
||||
set et_vect_sdot_hi_saved 1
|
||||
}
|
||||
}
|
||||
@ -1656,6 +1691,51 @@ proc check_effective_target_vect_udot_hi { } {
|
||||
}
|
||||
|
||||
|
||||
# Return 1 if the target plus current options supports a vector
|
||||
# demotion (packing) of shorts (to chars) and ints (to shorts)
|
||||
# using modulo arithmetic, 0 otherwise.
|
||||
#
|
||||
# This won't change for different subtargets so cache the result.
|
||||
|
||||
proc check_effective_target_vect_pack_mod { } {
|
||||
global et_vect_pack_mod
|
||||
|
||||
if [info exists et_vect_pack_mod_saved] {
|
||||
verbose "check_effective_target_vect_pack_mod: using cached result" 2
|
||||
} else {
|
||||
set et_vect_pack_mod_saved 0
|
||||
if { [istarget powerpc*-*-*]
|
||||
|| [istarget i?86-*-*]
|
||||
|| [istarget x86_64-*-*] } {
|
||||
set et_vect_pack_mod_saved 1
|
||||
}
|
||||
}
|
||||
verbose "check_effective_target_vect_pack_mod: returning $et_vect_pack_mod_saved" 2
|
||||
return $et_vect_pack_mod_saved
|
||||
}
|
||||
|
||||
# Return 1 if the target plus current options supports a vector
|
||||
# promotion (unpacking) of chars (to shorts) and shorts (to ints), 0 otherwise.
|
||||
#
|
||||
# This won't change for different subtargets so cache the result.
|
||||
|
||||
proc check_effective_target_vect_unpack { } {
|
||||
global et_vect_unpack
|
||||
|
||||
if [info exists et_vect_unpack_saved] {
|
||||
verbose "check_effective_target_vect_unpack: using cached result" 2
|
||||
} else {
|
||||
set et_vect_unpack_saved 0
|
||||
if { [istarget powerpc*-*-*]
|
||||
|| [istarget i?86-*-*]
|
||||
|| [istarget x86_64-*-*] } {
|
||||
set et_vect_unpack_saved 1
|
||||
}
|
||||
}
|
||||
verbose "check_effective_target_vect_unpack: returning $et_vect_unpack_saved" 2
|
||||
return $et_vect_unpack_saved
|
||||
}
|
||||
|
||||
# Return 1 if the target plus current options does not support a vector
|
||||
# alignment mechanism, 0 otherwise.
|
||||
#
|
||||
|
@ -1765,6 +1765,12 @@ estimate_num_insns_1 (tree *tp, int *walk_subtrees, void *data)
|
||||
case REDUC_PLUS_EXPR:
|
||||
case WIDEN_SUM_EXPR:
|
||||
case DOT_PROD_EXPR:
|
||||
case VEC_WIDEN_MULT_HI_EXPR:
|
||||
case VEC_WIDEN_MULT_LO_EXPR:
|
||||
case VEC_UNPACK_HI_EXPR:
|
||||
case VEC_UNPACK_LO_EXPR:
|
||||
case VEC_PACK_MOD_EXPR:
|
||||
case VEC_PACK_SAT_EXPR:
|
||||
|
||||
case WIDEN_MULT_EXPR:
|
||||
|
||||
|
@ -1702,9 +1702,9 @@ dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags,
|
||||
case DOT_PROD_EXPR:
|
||||
pp_string (buffer, " DOT_PROD_EXPR < ");
|
||||
dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
|
||||
pp_string (buffer, " , ");
|
||||
pp_string (buffer, ", ");
|
||||
dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
|
||||
pp_string (buffer, " , ");
|
||||
pp_string (buffer, ", ");
|
||||
dump_generic_node (buffer, TREE_OPERAND (node, 2), spc, flags, false);
|
||||
pp_string (buffer, " > ");
|
||||
break;
|
||||
@ -1863,6 +1863,50 @@ dump_generic_node (pretty_printer *buffer, tree node, int spc, int flags,
|
||||
pp_string (buffer, " > ");
|
||||
break;
|
||||
|
||||
case VEC_WIDEN_MULT_HI_EXPR:
|
||||
pp_string (buffer, " VEC_WIDEN_MULT_HI_EXPR < ");
|
||||
dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
|
||||
pp_string (buffer, ", ");
|
||||
dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
|
||||
pp_string (buffer, " > ");
|
||||
break;
|
||||
|
||||
case VEC_WIDEN_MULT_LO_EXPR:
|
||||
pp_string (buffer, " VEC_WIDEN_MULT_LO_EXPR < ");
|
||||
dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
|
||||
pp_string (buffer, ", ");
|
||||
dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
|
||||
pp_string (buffer, " > ");
|
||||
break;
|
||||
|
||||
case VEC_UNPACK_HI_EXPR:
|
||||
pp_string (buffer, " VEC_UNPACK_HI_EXPR < ");
|
||||
dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
|
||||
pp_string (buffer, " > ");
|
||||
break;
|
||||
|
||||
case VEC_UNPACK_LO_EXPR:
|
||||
pp_string (buffer, " VEC_UNPACK_LO_EXPR < ");
|
||||
dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
|
||||
pp_string (buffer, " > ");
|
||||
break;
|
||||
|
||||
case VEC_PACK_MOD_EXPR:
|
||||
pp_string (buffer, " VEC_PACK_MOD_EXPR < ");
|
||||
dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
|
||||
pp_string (buffer, ", ");
|
||||
dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
|
||||
pp_string (buffer, " > ");
|
||||
break;
|
||||
|
||||
case VEC_PACK_SAT_EXPR:
|
||||
pp_string (buffer, " VEC_PACK_SAT_EXPR < ");
|
||||
dump_generic_node (buffer, TREE_OPERAND (node, 0), spc, flags, false);
|
||||
pp_string (buffer, ", ");
|
||||
dump_generic_node (buffer, TREE_OPERAND (node, 1), spc, flags, false);
|
||||
pp_string (buffer, " > ");
|
||||
break;
|
||||
|
||||
case BLOCK:
|
||||
{
|
||||
tree t;
|
||||
@ -2165,6 +2209,8 @@ op_prio (tree op)
|
||||
case MINUS_EXPR:
|
||||
return 12;
|
||||
|
||||
case VEC_WIDEN_MULT_HI_EXPR:
|
||||
case VEC_WIDEN_MULT_LO_EXPR:
|
||||
case WIDEN_MULT_EXPR:
|
||||
case DOT_PROD_EXPR:
|
||||
case MULT_EXPR:
|
||||
@ -2218,6 +2264,10 @@ op_prio (tree op)
|
||||
case REDUC_PLUS_EXPR:
|
||||
case VEC_LSHIFT_EXPR:
|
||||
case VEC_RSHIFT_EXPR:
|
||||
case VEC_UNPACK_HI_EXPR:
|
||||
case VEC_UNPACK_LO_EXPR:
|
||||
case VEC_PACK_MOD_EXPR:
|
||||
case VEC_PACK_SAT_EXPR:
|
||||
return 16;
|
||||
|
||||
case SAVE_EXPR:
|
||||
|
@ -54,8 +54,6 @@ static bool vect_determine_vectorization_factor (loop_vec_info);
|
||||
|
||||
/* Utility functions for the analyses. */
|
||||
static bool exist_non_indexing_operands_for_use_p (tree, tree);
|
||||
static void vect_mark_relevant (VEC(tree,heap) **, tree, bool, bool);
|
||||
static bool vect_stmt_relevant_p (tree, loop_vec_info, bool *, bool *);
|
||||
static tree vect_get_loop_niters (struct loop *, tree *);
|
||||
static bool vect_analyze_data_ref_dependence
|
||||
(struct data_dependence_relation *, loop_vec_info);
|
||||
@ -187,22 +185,9 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "nunits = %d", nunits);
|
||||
|
||||
if (vectorization_factor)
|
||||
{
|
||||
/* FORNOW: don't allow mixed units.
|
||||
This restriction will be relaxed in the future. */
|
||||
if (nunits != vectorization_factor)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS))
|
||||
fprintf (vect_dump, "not vectorized: mixed data-types");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
if (!vectorization_factor
|
||||
|| (nunits > vectorization_factor))
|
||||
vectorization_factor = nunits;
|
||||
|
||||
gcc_assert (GET_MODE_SIZE (TYPE_MODE (scalar_type))
|
||||
* vectorization_factor == UNITS_PER_SIMD_WORD);
|
||||
}
|
||||
}
|
||||
|
||||
@ -310,7 +295,9 @@ vect_analyze_operations (loop_vec_info loop_vinfo)
|
||||
gcc_assert (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (stmt))));
|
||||
gcc_assert (STMT_VINFO_VECTYPE (stmt_info));
|
||||
|
||||
ok = (vectorizable_operation (stmt, NULL, NULL)
|
||||
ok = (vectorizable_type_promotion (stmt, NULL, NULL)
|
||||
|| vectorizable_type_demotion (stmt, NULL, NULL)
|
||||
|| vectorizable_operation (stmt, NULL, NULL)
|
||||
|| vectorizable_assignment (stmt, NULL, NULL)
|
||||
|| vectorizable_load (stmt, NULL, NULL)
|
||||
|| vectorizable_store (stmt, NULL, NULL)
|
||||
@ -588,6 +575,8 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
|
||||
struct data_reference *drb = DDR_B (ddr);
|
||||
stmt_vec_info stmtinfo_a = vinfo_for_stmt (DR_STMT (dra));
|
||||
stmt_vec_info stmtinfo_b = vinfo_for_stmt (DR_STMT (drb));
|
||||
int dra_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dra))));
|
||||
int drb_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (drb))));
|
||||
lambda_vector dist_v;
|
||||
unsigned int loop_depth;
|
||||
|
||||
@ -628,7 +617,7 @@ vect_analyze_data_ref_dependence (struct data_dependence_relation *ddr,
|
||||
fprintf (vect_dump, "dependence distance = %d.", dist);
|
||||
|
||||
/* Same loop iteration. */
|
||||
if (dist % vectorization_factor == 0)
|
||||
if (dist % vectorization_factor == 0 && dra_size == drb_size)
|
||||
{
|
||||
/* Two references with distance zero have the same alignment. */
|
||||
VEC_safe_push (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmtinfo_a), drb);
|
||||
@ -837,12 +826,15 @@ vect_update_misalignment_for_peel (struct data_reference *dr,
|
||||
struct data_reference *dr_peel, int npeel)
|
||||
{
|
||||
unsigned int i;
|
||||
int drsize;
|
||||
VEC(dr_p,heap) *same_align_drs;
|
||||
struct data_reference *current_dr;
|
||||
int dr_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
|
||||
int dr_peel_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr_peel))));
|
||||
|
||||
if (known_alignment_for_access_p (dr)
|
||||
&& DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr_peel))
|
||||
&& known_alignment_for_access_p (dr_peel)
|
||||
&& (DR_MISALIGNMENT (dr)/dr_size ==
|
||||
DR_MISALIGNMENT (dr_peel)/dr_peel_size))
|
||||
{
|
||||
DR_MISALIGNMENT (dr) = 0;
|
||||
return;
|
||||
@ -856,7 +848,8 @@ vect_update_misalignment_for_peel (struct data_reference *dr,
|
||||
{
|
||||
if (current_dr != dr)
|
||||
continue;
|
||||
gcc_assert (DR_MISALIGNMENT (dr) == DR_MISALIGNMENT (dr_peel));
|
||||
gcc_assert (DR_MISALIGNMENT (dr)/dr_size ==
|
||||
DR_MISALIGNMENT (dr_peel)/dr_peel_size);
|
||||
DR_MISALIGNMENT (dr) = 0;
|
||||
return;
|
||||
}
|
||||
@ -864,12 +857,13 @@ vect_update_misalignment_for_peel (struct data_reference *dr,
|
||||
if (known_alignment_for_access_p (dr)
|
||||
&& known_alignment_for_access_p (dr_peel))
|
||||
{
|
||||
drsize = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr))));
|
||||
DR_MISALIGNMENT (dr) += npeel * drsize;
|
||||
DR_MISALIGNMENT (dr) += npeel * dr_size;
|
||||
DR_MISALIGNMENT (dr) %= UNITS_PER_SIMD_WORD;
|
||||
return;
|
||||
}
|
||||
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "Setting misalignment to -1.");
|
||||
DR_MISALIGNMENT (dr) = -1;
|
||||
}
|
||||
|
||||
@ -1014,6 +1008,9 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
|
||||
bool do_versioning = false;
|
||||
bool stat;
|
||||
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "=== vect_enhance_data_refs_alignment ===");
|
||||
|
||||
/* While cost model enhancements are expected in the future, the high level
|
||||
view of the code at this time is as follows:
|
||||
|
||||
@ -1080,6 +1077,8 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
|
||||
mis = DR_MISALIGNMENT (dr0);
|
||||
mis /= GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr0))));
|
||||
npeel = LOOP_VINFO_VECT_FACTOR (loop_vinfo) - mis;
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "Try peeling by %d",npeel);
|
||||
}
|
||||
|
||||
/* Ensure that all data refs can be vectorized after the peel. */
|
||||
@ -1423,14 +1422,14 @@ vect_analyze_data_refs (loop_vec_info loop_vinfo)
|
||||
|
||||
static void
|
||||
vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
|
||||
bool relevant_p, bool live_p)
|
||||
enum vect_relevant relevant, bool live_p)
|
||||
{
|
||||
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
||||
bool save_relevant_p = STMT_VINFO_RELEVANT_P (stmt_info);
|
||||
enum vect_relevant save_relevant = STMT_VINFO_RELEVANT (stmt_info);
|
||||
bool save_live_p = STMT_VINFO_LIVE_P (stmt_info);
|
||||
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "mark relevant %d, live %d.",relevant_p, live_p);
|
||||
fprintf (vect_dump, "mark relevant %d, live %d.", relevant, live_p);
|
||||
|
||||
if (STMT_VINFO_IN_PATTERN_P (stmt_info))
|
||||
{
|
||||
@ -1445,20 +1444,21 @@ vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
|
||||
pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info);
|
||||
stmt_info = vinfo_for_stmt (pattern_stmt);
|
||||
gcc_assert (STMT_VINFO_RELATED_STMT (stmt_info) == stmt);
|
||||
save_relevant_p = STMT_VINFO_RELEVANT_P (stmt_info);
|
||||
save_relevant = STMT_VINFO_RELEVANT (stmt_info);
|
||||
save_live_p = STMT_VINFO_LIVE_P (stmt_info);
|
||||
stmt = pattern_stmt;
|
||||
}
|
||||
|
||||
STMT_VINFO_LIVE_P (stmt_info) |= live_p;
|
||||
STMT_VINFO_RELEVANT_P (stmt_info) |= relevant_p;
|
||||
if (relevant > STMT_VINFO_RELEVANT (stmt_info))
|
||||
STMT_VINFO_RELEVANT (stmt_info) = relevant;
|
||||
|
||||
if (TREE_CODE (stmt) == PHI_NODE)
|
||||
/* Don't put phi-nodes in the worklist. Phis that are marked relevant
|
||||
or live will fail vectorization later on. */
|
||||
return;
|
||||
|
||||
if (STMT_VINFO_RELEVANT_P (stmt_info) == save_relevant_p
|
||||
if (STMT_VINFO_RELEVANT (stmt_info) == save_relevant
|
||||
&& STMT_VINFO_LIVE_P (stmt_info) == save_live_p)
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
@ -1484,7 +1484,7 @@ vect_mark_relevant (VEC(tree,heap) **worklist, tree stmt,
|
||||
|
||||
static bool
|
||||
vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
|
||||
bool *relevant_p, bool *live_p)
|
||||
enum vect_relevant *relevant, bool *live_p)
|
||||
{
|
||||
struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
|
||||
ssa_op_iter op_iter;
|
||||
@ -1492,12 +1492,12 @@ vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
|
||||
use_operand_p use_p;
|
||||
def_operand_p def_p;
|
||||
|
||||
*relevant_p = false;
|
||||
*relevant = vect_unused_in_loop;
|
||||
*live_p = false;
|
||||
|
||||
/* cond stmt other than loop exit cond. */
|
||||
if (is_ctrl_stmt (stmt) && (stmt != LOOP_VINFO_EXIT_COND (loop_vinfo)))
|
||||
*relevant_p = true;
|
||||
*relevant = vect_used_in_loop;
|
||||
|
||||
/* changing memory. */
|
||||
if (TREE_CODE (stmt) != PHI_NODE)
|
||||
@ -1505,7 +1505,7 @@ vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "vec_stmt_relevant_p: stmt has vdefs.");
|
||||
*relevant_p = true;
|
||||
*relevant = vect_used_in_loop;
|
||||
}
|
||||
|
||||
/* uses outside the loop. */
|
||||
@ -1529,7 +1529,7 @@ vect_stmt_relevant_p (tree stmt, loop_vec_info loop_vinfo,
|
||||
}
|
||||
}
|
||||
|
||||
return (*live_p || *relevant_p);
|
||||
return (*live_p || *relevant);
|
||||
}
|
||||
|
||||
|
||||
@ -1564,7 +1564,8 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
|
||||
stmt_vec_info stmt_vinfo;
|
||||
basic_block bb;
|
||||
tree phi;
|
||||
bool relevant_p, live_p;
|
||||
bool live_p;
|
||||
enum vect_relevant relevant;
|
||||
tree def, def_stmt;
|
||||
enum vect_def_type dt;
|
||||
|
||||
@ -1584,8 +1585,8 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
|
||||
print_generic_expr (vect_dump, phi, TDF_SLIM);
|
||||
}
|
||||
|
||||
if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant_p, &live_p))
|
||||
vect_mark_relevant (&worklist, phi, relevant_p, live_p);
|
||||
if (vect_stmt_relevant_p (phi, loop_vinfo, &relevant, &live_p))
|
||||
vect_mark_relevant (&worklist, phi, relevant, live_p);
|
||||
}
|
||||
|
||||
for (i = 0; i < nbbs; i++)
|
||||
@ -1601,8 +1602,8 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
|
||||
print_generic_expr (vect_dump, stmt, TDF_SLIM);
|
||||
}
|
||||
|
||||
if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant_p, &live_p))
|
||||
vect_mark_relevant (&worklist, stmt, relevant_p, live_p);
|
||||
if (vect_stmt_relevant_p (stmt, loop_vinfo, &relevant, &live_p))
|
||||
vect_mark_relevant (&worklist, stmt, relevant, live_p);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1619,7 +1620,7 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
|
||||
print_generic_expr (vect_dump, stmt, TDF_SLIM);
|
||||
}
|
||||
|
||||
/* Examine the USEs of STMT. For each ssa-name USE thta is defined
|
||||
/* Examine the USEs of STMT. For each ssa-name USE that is defined
|
||||
in the loop, mark the stmt that defines it (DEF_STMT) as
|
||||
relevant/irrelevant and live/dead according to the liveness and
|
||||
relevance properties of STMT.
|
||||
@ -1630,13 +1631,13 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
|
||||
ann = stmt_ann (stmt);
|
||||
stmt_vinfo = vinfo_for_stmt (stmt);
|
||||
|
||||
relevant_p = STMT_VINFO_RELEVANT_P (stmt_vinfo);
|
||||
relevant = STMT_VINFO_RELEVANT (stmt_vinfo);
|
||||
live_p = STMT_VINFO_LIVE_P (stmt_vinfo);
|
||||
|
||||
/* Generally, the liveness and relevance properties of STMT are
|
||||
propagated to the DEF_STMTs of its USEs:
|
||||
STMT_VINFO_LIVE_P (DEF_STMT_info) <-- live_p
|
||||
STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- relevant_p
|
||||
STMT_VINFO_RELEVANT (DEF_STMT_info) <-- relevant
|
||||
|
||||
Exceptions:
|
||||
|
||||
@ -1659,18 +1660,22 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
|
||||
the def_stmt of these uses we want to set liveness/relevance
|
||||
as follows:
|
||||
STMT_VINFO_LIVE_P (DEF_STMT_info) <-- false
|
||||
STMT_VINFO_RELEVANT_P (DEF_STMT_info) <-- true
|
||||
STMT_VINFO_RELEVANT (DEF_STMT_info) <-- vect_used_by_reduction
|
||||
because even though STMT is classified as live (since it defines a
|
||||
value that is used across loop iterations) and irrelevant (since it
|
||||
is not used inside the loop), it will be vectorized, and therefore
|
||||
the corresponding DEF_STMTs need to marked as relevant.
|
||||
We distinguish between two kinds of relevant stmts - those that are
|
||||
used by a reduction conputation, and those that are (also) used by a regular computation. This allows us later on to identify stmts
|
||||
that are used solely by a reduction, and therefore the order of
|
||||
the results that they produce does not have to be kept.
|
||||
*/
|
||||
|
||||
/* case 2.2: */
|
||||
if (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def)
|
||||
{
|
||||
gcc_assert (!relevant_p && live_p);
|
||||
relevant_p = true;
|
||||
gcc_assert (relevant == vect_unused_in_loop && live_p);
|
||||
relevant = vect_used_by_reduction;
|
||||
live_p = false;
|
||||
}
|
||||
|
||||
@ -1710,7 +1715,7 @@ vect_mark_stmts_to_be_vectorized (loop_vec_info loop_vinfo)
|
||||
&& TREE_CODE (def_stmt) == PHI_NODE)
|
||||
continue;
|
||||
|
||||
vect_mark_relevant (&worklist, def_stmt, relevant_p, live_p);
|
||||
vect_mark_relevant (&worklist, def_stmt, relevant, live_p);
|
||||
}
|
||||
} /* while worklist */
|
||||
|
||||
@ -1738,7 +1743,7 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
|
||||
/* Analyze phi functions of the loop header. */
|
||||
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "=== vect_can_advance_ivs_p ===");
|
||||
fprintf (vect_dump, "vect_can_advance_ivs_p:");
|
||||
|
||||
for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
|
||||
{
|
||||
|
@ -411,9 +411,15 @@ expand_vector_operations_1 (block_stmt_iterator *bsi)
|
||||
gcc_assert (code != CONVERT_EXPR);
|
||||
op = optab_for_tree_code (code, type);
|
||||
|
||||
/* For widening vector operations, the relevant type is of the arguments,
|
||||
not the widened result. */
|
||||
if (code == WIDEN_SUM_EXPR)
|
||||
/* For widening/narrowgin vector operations, the relevant type is of the
|
||||
arguments, not the widened result. */
|
||||
if (code == WIDEN_SUM_EXPR
|
||||
|| code == VEC_WIDEN_MULT_HI_EXPR
|
||||
|| code == VEC_WIDEN_MULT_LO_EXPR
|
||||
|| code == VEC_UNPACK_HI_EXPR
|
||||
|| code == VEC_UNPACK_LO_EXPR
|
||||
|| code == VEC_PACK_MOD_EXPR
|
||||
|| code == VEC_PACK_SAT_EXPR)
|
||||
type = TREE_TYPE (TREE_OPERAND (rhs, 0));
|
||||
|
||||
/* Optabs will try converting a negation into a subtraction, so
|
||||
|
@ -334,12 +334,69 @@ vect_recog_dot_prod_pattern (tree last_stmt, tree *type_in, tree *type_out)
|
||||
*/
|
||||
|
||||
static tree
|
||||
vect_recog_widen_mult_pattern (tree last_stmt ATTRIBUTE_UNUSED,
|
||||
tree *type_in ATTRIBUTE_UNUSED,
|
||||
tree *type_out ATTRIBUTE_UNUSED)
|
||||
vect_recog_widen_mult_pattern (tree last_stmt,
|
||||
tree *type_in,
|
||||
tree *type_out)
|
||||
{
|
||||
/* Yet to be implemented. */
|
||||
return NULL;
|
||||
tree expr;
|
||||
tree def_stmt0, def_stmt1;
|
||||
tree oprnd0, oprnd1;
|
||||
tree type, half_type0, half_type1;
|
||||
tree pattern_expr;
|
||||
tree vectype;
|
||||
tree dummy;
|
||||
enum tree_code dummy_code;
|
||||
|
||||
if (TREE_CODE (last_stmt) != MODIFY_EXPR)
|
||||
return NULL;
|
||||
|
||||
expr = TREE_OPERAND (last_stmt, 1);
|
||||
type = TREE_TYPE (expr);
|
||||
|
||||
/* Starting from LAST_STMT, follow the defs of its uses in search
|
||||
of the above pattern. */
|
||||
|
||||
if (TREE_CODE (expr) != MULT_EXPR)
|
||||
return NULL;
|
||||
|
||||
oprnd0 = TREE_OPERAND (expr, 0);
|
||||
oprnd1 = TREE_OPERAND (expr, 1);
|
||||
if (TYPE_MAIN_VARIANT (TREE_TYPE (oprnd0)) != TYPE_MAIN_VARIANT (type)
|
||||
|| TYPE_MAIN_VARIANT (TREE_TYPE (oprnd1)) != TYPE_MAIN_VARIANT (type))
|
||||
return NULL;
|
||||
|
||||
/* Check argument 0 */
|
||||
if (!widened_name_p (oprnd0, last_stmt, &half_type0, &def_stmt0))
|
||||
return NULL;
|
||||
oprnd0 = TREE_OPERAND (TREE_OPERAND (def_stmt0, 1), 0);
|
||||
|
||||
/* Check argument 1 */
|
||||
if (!widened_name_p (oprnd1, last_stmt, &half_type1, &def_stmt1))
|
||||
return NULL;
|
||||
oprnd1 = TREE_OPERAND (TREE_OPERAND (def_stmt1, 1), 0);
|
||||
|
||||
if (TYPE_MAIN_VARIANT (half_type0) != TYPE_MAIN_VARIANT (half_type1))
|
||||
return NULL;
|
||||
|
||||
/* Pattern detected. */
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "vect_recog_widen_mult_pattern: detected: ");
|
||||
|
||||
/* Check target support */
|
||||
vectype = get_vectype_for_scalar_type (half_type0);
|
||||
if (!supportable_widening_operation (WIDEN_MULT_EXPR, last_stmt, vectype,
|
||||
&dummy, &dummy, &dummy_code,
|
||||
&dummy_code))
|
||||
return NULL;
|
||||
|
||||
*type_in = vectype;
|
||||
*type_out = NULL_TREE;
|
||||
|
||||
/* Pattern supported. Create a stmt to be used to replace the pattern: */
|
||||
pattern_expr = build2 (WIDEN_MULT_EXPR, type, oprnd0, oprnd1);
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
print_generic_expr (vect_dump, pattern_expr, TDF_SLIM);
|
||||
return pattern_expr;
|
||||
}
|
||||
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -136,6 +136,7 @@ Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
#include "cfgloop.h"
|
||||
#include "cfglayout.h"
|
||||
#include "expr.h"
|
||||
#include "recog.h"
|
||||
#include "optabs.h"
|
||||
#include "params.h"
|
||||
#include "toplev.h"
|
||||
@ -1359,8 +1360,8 @@ new_stmt_vec_info (tree stmt, loop_vec_info loop_vinfo)
|
||||
STMT_VINFO_TYPE (res) = undef_vec_info_type;
|
||||
STMT_VINFO_STMT (res) = stmt;
|
||||
STMT_VINFO_LOOP_VINFO (res) = loop_vinfo;
|
||||
STMT_VINFO_RELEVANT_P (res) = 0;
|
||||
STMT_VINFO_LIVE_P (res) = 0;
|
||||
STMT_VINFO_RELEVANT (res) = 0;
|
||||
STMT_VINFO_LIVE_P (res) = false;
|
||||
STMT_VINFO_VECTYPE (res) = NULL;
|
||||
STMT_VINFO_VEC_STMT (res) = NULL;
|
||||
STMT_VINFO_IN_PATTERN_P (res) = false;
|
||||
@ -1753,6 +1754,127 @@ vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, tree *def_stmt,
|
||||
}
|
||||
|
||||
|
||||
/* Function supportable_widening_operation
|
||||
|
||||
Check whether an operation represented by the code CODE is a
|
||||
widening operation that is supported by the target platform in
|
||||
vector form (i.e., when operating on arguments of type VECTYPE).
|
||||
|
||||
The two kinds of widening operations we currently support are
|
||||
NOP and WIDEN_MULT. This function checks if these oprations
|
||||
are supported by the target platform either directly (via vector
|
||||
tree-codes), or via target builtins.
|
||||
|
||||
Output:
|
||||
- CODE1 and CODE2 are codes of vector operations to be used when
|
||||
vectorizing the operation, if available.
|
||||
- DECL1 and DECL2 are decls of target builtin functions to be used
|
||||
when vectorizing the operation, if available. In this case,
|
||||
CODE1 and CODE2 are CALL_EXPR. */
|
||||
|
||||
bool
|
||||
supportable_widening_operation (enum tree_code code, tree stmt, tree vectype,
|
||||
tree *decl1, tree *decl2,
|
||||
enum tree_code *code1, enum tree_code *code2)
|
||||
{
|
||||
stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
|
||||
bool ordered_p;
|
||||
enum machine_mode vec_mode;
|
||||
enum insn_code icode1, icode2;
|
||||
optab optab1, optab2;
|
||||
tree expr = TREE_OPERAND (stmt, 1);
|
||||
tree type = TREE_TYPE (expr);
|
||||
tree wide_vectype = get_vectype_for_scalar_type (type);
|
||||
enum tree_code c1, c2;
|
||||
|
||||
/* The result of a vectorized widening operation usually requires two vectors
|
||||
(because the widened results do not fit int one vector). The generated
|
||||
vector results would normally be expected to be generated in the same
|
||||
order as in the original scalar computation. i.e. if 8 results are
|
||||
generated in each vector iteration, they are to be organized as follows:
|
||||
vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8].
|
||||
|
||||
However, in the special case that the result of the widening operation is
|
||||
used in a reduction copmutation only, the order doesn't matter (because
|
||||
when vectorizing a reduction we change the order of the computation).
|
||||
Some targets can take advatage of this and generate more efficient code.
|
||||
For example, targets like Altivec, that support widen_mult using a sequence
|
||||
of {mult_even,mult_odd} generate the following vectors:
|
||||
vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8]. */
|
||||
|
||||
if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction)
|
||||
ordered_p = false;
|
||||
else
|
||||
ordered_p = true;
|
||||
|
||||
if (!ordered_p
|
||||
&& code == WIDEN_MULT_EXPR
|
||||
&& targetm.vectorize.builtin_mul_widen_even
|
||||
&& targetm.vectorize.builtin_mul_widen_even (vectype)
|
||||
&& targetm.vectorize.builtin_mul_widen_odd
|
||||
&& targetm.vectorize.builtin_mul_widen_odd (vectype))
|
||||
{
|
||||
if (vect_print_dump_info (REPORT_DETAILS))
|
||||
fprintf (vect_dump, "Unordered widening operation detected.");
|
||||
|
||||
*code1 = *code2 = CALL_EXPR;
|
||||
*decl1 = targetm.vectorize.builtin_mul_widen_even (vectype);
|
||||
*decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype);
|
||||
return true;
|
||||
}
|
||||
|
||||
switch (code)
|
||||
{
|
||||
case WIDEN_MULT_EXPR:
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
{
|
||||
c1 = VEC_WIDEN_MULT_HI_EXPR;
|
||||
c2 = VEC_WIDEN_MULT_LO_EXPR;
|
||||
}
|
||||
else
|
||||
{
|
||||
c2 = VEC_WIDEN_MULT_HI_EXPR;
|
||||
c1 = VEC_WIDEN_MULT_LO_EXPR;
|
||||
}
|
||||
break;
|
||||
|
||||
case NOP_EXPR:
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
{
|
||||
c1 = VEC_UNPACK_HI_EXPR;
|
||||
c2 = VEC_UNPACK_LO_EXPR;
|
||||
}
|
||||
else
|
||||
{
|
||||
c2 = VEC_UNPACK_HI_EXPR;
|
||||
c1 = VEC_UNPACK_LO_EXPR;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
*code1 = c1;
|
||||
*code2 = c2;
|
||||
optab1 = optab_for_tree_code (c1, vectype);
|
||||
optab2 = optab_for_tree_code (c2, vectype);
|
||||
|
||||
if (!optab1 || !optab2)
|
||||
return false;
|
||||
|
||||
vec_mode = TYPE_MODE (vectype);
|
||||
if ((icode1 = optab1->handlers[(int) vec_mode].insn_code) == CODE_FOR_nothing
|
||||
|| insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype)
|
||||
|| (icode2 = optab2->handlers[(int) vec_mode].insn_code)
|
||||
== CODE_FOR_nothing
|
||||
|| insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype))
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/* Function reduction_code_for_scalar_code
|
||||
|
||||
Input:
|
||||
|
@ -165,7 +165,16 @@ enum stmt_vec_info_type {
|
||||
op_vec_info_type,
|
||||
assignment_vec_info_type,
|
||||
condition_vec_info_type,
|
||||
reduc_vec_info_type
|
||||
reduc_vec_info_type,
|
||||
type_promotion_vec_info_type,
|
||||
type_demotion_vec_info_type
|
||||
};
|
||||
|
||||
/* Indicates whether/how a variable is used in the loop. */
|
||||
enum vect_relevant {
|
||||
vect_unused_in_loop = 0,
|
||||
vect_used_by_reduction,
|
||||
vect_used_in_loop
|
||||
};
|
||||
|
||||
typedef struct data_reference *dr_p;
|
||||
@ -182,10 +191,10 @@ typedef struct _stmt_vec_info {
|
||||
/* The loop_vec_info with respect to which STMT is vectorized. */
|
||||
loop_vec_info loop_vinfo;
|
||||
|
||||
/* Not all stmts in the loop need to be vectorized. e.g, the incrementation
|
||||
/* Not all stmts in the loop need to be vectorized. e.g, the increment
|
||||
of the loop induction variable and computation of array indexes. relevant
|
||||
indicates whether the stmt needs to be vectorized. */
|
||||
bool relevant;
|
||||
enum vect_relevant relevant;
|
||||
|
||||
/* Indicates whether this stmts is part of a computation whose result is
|
||||
used outside the loop. */
|
||||
@ -232,7 +241,7 @@ typedef struct _stmt_vec_info {
|
||||
#define STMT_VINFO_TYPE(S) (S)->type
|
||||
#define STMT_VINFO_STMT(S) (S)->stmt
|
||||
#define STMT_VINFO_LOOP_VINFO(S) (S)->loop_vinfo
|
||||
#define STMT_VINFO_RELEVANT_P(S) (S)->relevant
|
||||
#define STMT_VINFO_RELEVANT(S) (S)->relevant
|
||||
#define STMT_VINFO_LIVE_P(S) (S)->live
|
||||
#define STMT_VINFO_VECTYPE(S) (S)->vectype
|
||||
#define STMT_VINFO_VEC_STMT(S) (S)->vectorized_stmt
|
||||
@ -242,6 +251,8 @@ typedef struct _stmt_vec_info {
|
||||
#define STMT_VINFO_SAME_ALIGN_REFS(S) (S)->same_align_refs
|
||||
#define STMT_VINFO_DEF_TYPE(S) (S)->def_type
|
||||
|
||||
#define STMT_VINFO_RELEVANT_P(S) ((S)->relevant != vect_unused_in_loop)
|
||||
|
||||
static inline void set_stmt_info (stmt_ann_t ann, stmt_vec_info stmt_info);
|
||||
static inline stmt_vec_info vinfo_for_stmt (tree stmt);
|
||||
|
||||
@ -328,6 +339,8 @@ extern bool vect_can_force_dr_alignment_p (tree, unsigned int);
|
||||
extern enum dr_alignment_support vect_supportable_dr_alignment
|
||||
(struct data_reference *);
|
||||
extern bool reduction_code_for_scalar_code (enum tree_code, enum tree_code *);
|
||||
extern bool supportable_widening_operation (enum tree_code, tree, tree,
|
||||
tree *, tree *, enum tree_code *, enum tree_code *);
|
||||
/* Creation and deletion of loop and stmt info structs. */
|
||||
extern loop_vec_info new_loop_vec_info (struct loop *loop);
|
||||
extern void destroy_loop_vec_info (loop_vec_info);
|
||||
@ -354,6 +367,8 @@ void vect_pattern_recog (loop_vec_info);
|
||||
extern bool vectorizable_load (tree, block_stmt_iterator *, tree *);
|
||||
extern bool vectorizable_store (tree, block_stmt_iterator *, tree *);
|
||||
extern bool vectorizable_operation (tree, block_stmt_iterator *, tree *);
|
||||
extern bool vectorizable_type_promotion (tree, block_stmt_iterator *, tree *);
|
||||
extern bool vectorizable_type_demotion (tree, block_stmt_iterator *, tree *);
|
||||
extern bool vectorizable_assignment (tree, block_stmt_iterator *, tree *);
|
||||
extern bool vectorizable_condition (tree, block_stmt_iterator *, tree *);
|
||||
extern bool vectorizable_live_operation (tree, block_stmt_iterator *, tree *);
|
||||
|
22
gcc/tree.def
22
gcc/tree.def
@ -1073,6 +1073,28 @@ DEFTREECODE (WIDEN_MULT_EXPR, "widen_mult_expr", tcc_binary, 2)
|
||||
DEFTREECODE (VEC_LSHIFT_EXPR, "vec_lshift_expr", tcc_binary, 2)
|
||||
DEFTREECODE (VEC_RSHIFT_EXPR, "vec_rshift_expr", tcc_binary, 2)
|
||||
|
||||
/* Widening vector multiplication.
|
||||
The two operands are vectors with N elements of size S. Multiplying the
|
||||
elements of the two vectors will result in N products of size 2*S.
|
||||
VEC_WIDEN_MULT_HI_EXPR computes the N/2 high products.
|
||||
VEC_WIDEN_MULT_LO_EXPR computes the N/2 low products. */
|
||||
DEFTREECODE (VEC_WIDEN_MULT_HI_EXPR, "widen_mult_hi_expr", tcc_binary, 2)
|
||||
DEFTREECODE (VEC_WIDEN_MULT_LO_EXPR, "widen_mult_hi_expr", tcc_binary, 2)
|
||||
|
||||
/* Unpack (extract and promote/widen) the high/low elements of the input vector
|
||||
into the output vector. The input vector has twice as many elements
|
||||
as the output vector, that are half the size of the elements
|
||||
of the output vector. This is used to support type promotion. */
|
||||
DEFTREECODE (VEC_UNPACK_HI_EXPR, "vec_unpack_hi_expr", tcc_unary, 1)
|
||||
DEFTREECODE (VEC_UNPACK_LO_EXPR, "vec_unpack_lo_expr", tcc_unary, 1)
|
||||
|
||||
/* Pack (demote/narrow and merge) the elements of the two input vectors
|
||||
into the output vector, using modulo/saturating arithmetic.
|
||||
The elements of the input vectors are twice the size of the elements of the
|
||||
output vector. This is used to support type demotion. */
|
||||
DEFTREECODE (VEC_PACK_MOD_EXPR, "vec_pack_mod_expr", tcc_binary, 2)
|
||||
DEFTREECODE (VEC_PACK_SAT_EXPR, "vec_pack_sat_expr", tcc_binary, 2)
|
||||
|
||||
/*
|
||||
Local variables:
|
||||
mode:c
|
||||
|
Loading…
Reference in New Issue
Block a user