mirror of
https://gcc.gnu.org/git/gcc.git
synced 2024-12-28 13:34:59 +08:00
i386.c (x86_use_leave, [...]): Merge into ...
* config/i386/i386.c (x86_use_leave, x86_push_memory, x86_zero_extend_with_and, x86_movx, x86_double_with_add, x86_use_bit_test, x86_unroll_strlen, x86_deep_branch, x86_branch_hints, x86_use_sahf, x86_partial_reg_stall, x86_partial_flag_reg_stall, x86_use_himode_fiop, x86_use_simode_fiop, x86_use_mov0, x86_use_cltd, x86_read_modify_write, x86_read_modify, x86_split_long_moves, x86_promote_QImode, x86_fast_prefix, x86_single_stringop, x86_qimode_math, x86_promote_qi_regs, x86_himode_math, x86_promote_hi_regs, x86_sub_esp_4, x86_sub_esp_8, x86_add_esp_4, x86_add_esp_8, x86_integer_DFmode_moves, x86_partial_reg_dependency, x86_memory_mismatch_stall, x86_prologue_using_move, x86_epilogue_using_move, x86_shift1, x86_sse_partial_reg_dependency, x86_sse_split_regs, x86_sse_unaligned_move_optimal, x86_sse_typeless_stores, x86_sse_load0_by_pxor, x86_use_ffreep, x86_use_incdec, x86_inter_unit_moves, x86_ext_80387_constants, x86_four_jump_limit, x86_schedule, x86_use_bt, x86_pad_returns): Merge into ... (ix86_tune_features): ... here. New array. (x86_cmove, x86_use_xchgb, x86_cmpxchg, x86_cmpxchg8b, x86_xadd, x86_bswap): Merge into ... (ix86_arch_features): ... here. New array. (x86_3dnow_a): Remove. (x86_accumulate_outgoing_args): Make static. (x86_arch_always_fancy_math_387): Make static. (ix86_tune_mask, ix86_arch_mask): Move ... (override_options): ... to local variables here. Apply the appropriate mask to each element of ix86_arch_features and ix86_tune_features. Adjust TARGET_CMOVE and TARGET_USE_SAHF as were done in the old macros. (standard_80387_constant_p): Use TARGET_EXT_80387_CONSTANTS. * config/i386/i386.h (x86_use_leave, x86_push_memory, x86_zero_extend_with_and, x86_use_bit_test, x86_cmove, x86_deep_branch, x86_branch_hints, x86_unroll_strlen, x86_double_with_add, x86_partial_reg_stall, x86_movx, x86_use_himode_fiop, x86_use_simode_fiop, x86_use_mov0, x86_use_cltd, x86_use_xchgb, x86_read_modify_write, x86_read_modify, x86_split_long_moves, x86_promote_QImode, x86_single_stringop, x86_fast_prefix, x86_himode_math, x86_qimode_math, x86_promote_qi_regs, x86_promote_hi_regs, x86_integer_DFmode_moves, x86_add_esp_4, x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8, x86_partial_reg_dependency, x86_memory_mismatch_stall, x86_accumulate_outgoing_args, x86_prologue_using_move, x86_epilogue_using_move, x86_decompose_lea, x86_arch_always_fancy_math_387, x86_shift1, x86_sse_partial_reg_dependency, x86_sse_split_regs, x86_sse_unaligned_move_optimal, x86_sse_typeless_stores, x86_sse_load0_by_pxor, x86_use_ffreep, x86_inter_unit_moves, x86_schedule, x86_use_bt, x86_cmpxchg, x86_cmpxchg8b, x86_xadd, x86_use_incdec, x86_pad_returns, x86_bswap, x86_partial_flag_reg_stall): Remove. (enum ix86_tune_indices): New. (ix86_tune_features): New. (TARGET_USE_LEAVE, TARGET_PUSH_MEMORY, TARGET_ZERO_EXTEND_WITH_AND, TARGET_USE_BIT_TEST, TARGET_UNROLL_STRLEN, TARGET_DEEP_BRANCH_PREDICTION, TARGET_BRANCH_PREDICTION_HINTS, TARGET_DOUBLE_WITH_ADD, TARGET_USE_SAHF, TARGET_MOVX, TARGET_PARTIAL_REG_STALL, TARGET_PARTIAL_FLAG_REG_STALL, TARGET_USE_HIMODE_FIOP, TARGET_USE_SIMODE_FIOP, TARGET_USE_MOV0, TARGET_USE_CLTD, TARGET_USE_XCHGB, TARGET_SPLIT_LONG_MOVES, TARGET_READ_MODIFY_WRITE, TARGET_READ_MODIFY, TARGET_PROMOTE_QImode, TARGET_FAST_PREFIX, TARGET_SINGLE_STRINGOP, TARGET_QIMODE_MATH, TARGET_HIMODE_MATH, TARGET_PROMOTE_QI_REGS, TARGET_PROMOTE_HI_REGS, TARGET_ADD_ESP_4, TARGET_ADD_ESP_8, TARGET_SUB_ESP_4, TARGET_SUB_ESP_8, TARGET_INTEGER_DFMODE_MOVES, TARGET_PARTIAL_REG_DEPENDENCY, TARGET_SSE_PARTIAL_REG_DEPENDENCY, TARGET_SSE_UNALIGNED_MOVE_OPTIMAL, TARGET_SSE_SPLIT_REGS, TARGET_SSE_TYPELESS_STORES, TARGET_SSE_LOAD0_BY_PXOR, TARGET_MEMORY_MISMATCH_STALL, TARGET_PROLOGUE_USING_MOVE, TARGET_EPILOGUE_USING_MOVE, TARGET_SHIFT1, TARGET_USE_FFREEP, TARGET_INTER_UNIT_MOVES, TARGET_FOUR_JUMP_LIMIT, TARGET_SCHEDULE, TARGET_USE_BT, TARGET_USE_INCDEC, TARGET_PAD_RETURNS, TARGET_EXT_80387_CONSTANTS): Use it. (enum ix86_arch_indices): New. (ix86_arch_features): New. (TARGET_CMOVE, TARGET_CMPXCHG, TARGET_CMPXCHG8B, TARGET_XADD, TARGET_BSWAP): Use it. (ix86_tune_mask, ix86_arch_mask): Remove. From-SVN: r122621
This commit is contained in:
parent
14da607343
commit
80fd744fda
@ -1,3 +1,83 @@
|
||||
2007-03-06 Richard Henderson <rth@redhat.com>
|
||||
|
||||
* config/i386/i386.c (x86_use_leave, x86_push_memory,
|
||||
x86_zero_extend_with_and, x86_movx, x86_double_with_add,
|
||||
x86_use_bit_test, x86_unroll_strlen, x86_deep_branch,
|
||||
x86_branch_hints, x86_use_sahf, x86_partial_reg_stall,
|
||||
x86_partial_flag_reg_stall, x86_use_himode_fiop, x86_use_simode_fiop,
|
||||
x86_use_mov0, x86_use_cltd, x86_read_modify_write, x86_read_modify,
|
||||
x86_split_long_moves, x86_promote_QImode, x86_fast_prefix,
|
||||
x86_single_stringop, x86_qimode_math, x86_promote_qi_regs,
|
||||
x86_himode_math, x86_promote_hi_regs, x86_sub_esp_4, x86_sub_esp_8,
|
||||
x86_add_esp_4, x86_add_esp_8, x86_integer_DFmode_moves,
|
||||
x86_partial_reg_dependency, x86_memory_mismatch_stall,
|
||||
x86_prologue_using_move, x86_epilogue_using_move, x86_shift1,
|
||||
x86_sse_partial_reg_dependency, x86_sse_split_regs,
|
||||
x86_sse_unaligned_move_optimal, x86_sse_typeless_stores,
|
||||
x86_sse_load0_by_pxor, x86_use_ffreep, x86_use_incdec,
|
||||
x86_inter_unit_moves, x86_ext_80387_constants, x86_four_jump_limit,
|
||||
x86_schedule, x86_use_bt, x86_pad_returns): Merge into ...
|
||||
(ix86_tune_features): ... here. New array.
|
||||
(x86_cmove, x86_use_xchgb, x86_cmpxchg, x86_cmpxchg8b,
|
||||
x86_xadd, x86_bswap): Merge into ...
|
||||
(ix86_arch_features): ... here. New array.
|
||||
(x86_3dnow_a): Remove.
|
||||
(x86_accumulate_outgoing_args): Make static.
|
||||
(x86_arch_always_fancy_math_387): Make static.
|
||||
(ix86_tune_mask, ix86_arch_mask): Move ...
|
||||
(override_options): ... to local variables here. Apply the
|
||||
appropriate mask to each element of ix86_arch_features and
|
||||
ix86_tune_features. Adjust TARGET_CMOVE and TARGET_USE_SAHF
|
||||
as were done in the old macros.
|
||||
(standard_80387_constant_p): Use TARGET_EXT_80387_CONSTANTS.
|
||||
* config/i386/i386.h (x86_use_leave, x86_push_memory,
|
||||
x86_zero_extend_with_and, x86_use_bit_test, x86_cmove, x86_deep_branch,
|
||||
x86_branch_hints, x86_unroll_strlen, x86_double_with_add,
|
||||
x86_partial_reg_stall, x86_movx, x86_use_himode_fiop,
|
||||
x86_use_simode_fiop, x86_use_mov0, x86_use_cltd, x86_use_xchgb,
|
||||
x86_read_modify_write, x86_read_modify, x86_split_long_moves,
|
||||
x86_promote_QImode, x86_single_stringop, x86_fast_prefix,
|
||||
x86_himode_math, x86_qimode_math, x86_promote_qi_regs,
|
||||
x86_promote_hi_regs, x86_integer_DFmode_moves, x86_add_esp_4,
|
||||
x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8,
|
||||
x86_partial_reg_dependency, x86_memory_mismatch_stall,
|
||||
x86_accumulate_outgoing_args, x86_prologue_using_move,
|
||||
x86_epilogue_using_move, x86_decompose_lea,
|
||||
x86_arch_always_fancy_math_387, x86_shift1,
|
||||
x86_sse_partial_reg_dependency, x86_sse_split_regs,
|
||||
x86_sse_unaligned_move_optimal, x86_sse_typeless_stores,
|
||||
x86_sse_load0_by_pxor, x86_use_ffreep, x86_inter_unit_moves,
|
||||
x86_schedule, x86_use_bt, x86_cmpxchg, x86_cmpxchg8b, x86_xadd,
|
||||
x86_use_incdec, x86_pad_returns, x86_bswap,
|
||||
x86_partial_flag_reg_stall): Remove.
|
||||
(enum ix86_tune_indices): New.
|
||||
(ix86_tune_features): New.
|
||||
(TARGET_USE_LEAVE, TARGET_PUSH_MEMORY, TARGET_ZERO_EXTEND_WITH_AND,
|
||||
TARGET_USE_BIT_TEST, TARGET_UNROLL_STRLEN,
|
||||
TARGET_DEEP_BRANCH_PREDICTION, TARGET_BRANCH_PREDICTION_HINTS,
|
||||
TARGET_DOUBLE_WITH_ADD, TARGET_USE_SAHF, TARGET_MOVX,
|
||||
TARGET_PARTIAL_REG_STALL, TARGET_PARTIAL_FLAG_REG_STALL,
|
||||
TARGET_USE_HIMODE_FIOP, TARGET_USE_SIMODE_FIOP, TARGET_USE_MOV0,
|
||||
TARGET_USE_CLTD, TARGET_USE_XCHGB, TARGET_SPLIT_LONG_MOVES,
|
||||
TARGET_READ_MODIFY_WRITE, TARGET_READ_MODIFY, TARGET_PROMOTE_QImode,
|
||||
TARGET_FAST_PREFIX, TARGET_SINGLE_STRINGOP, TARGET_QIMODE_MATH,
|
||||
TARGET_HIMODE_MATH, TARGET_PROMOTE_QI_REGS, TARGET_PROMOTE_HI_REGS,
|
||||
TARGET_ADD_ESP_4, TARGET_ADD_ESP_8, TARGET_SUB_ESP_4,
|
||||
TARGET_SUB_ESP_8, TARGET_INTEGER_DFMODE_MOVES,
|
||||
TARGET_PARTIAL_REG_DEPENDENCY, TARGET_SSE_PARTIAL_REG_DEPENDENCY,
|
||||
TARGET_SSE_UNALIGNED_MOVE_OPTIMAL, TARGET_SSE_SPLIT_REGS,
|
||||
TARGET_SSE_TYPELESS_STORES, TARGET_SSE_LOAD0_BY_PXOR,
|
||||
TARGET_MEMORY_MISMATCH_STALL, TARGET_PROLOGUE_USING_MOVE,
|
||||
TARGET_EPILOGUE_USING_MOVE, TARGET_SHIFT1, TARGET_USE_FFREEP,
|
||||
TARGET_INTER_UNIT_MOVES, TARGET_FOUR_JUMP_LIMIT, TARGET_SCHEDULE,
|
||||
TARGET_USE_BT, TARGET_USE_INCDEC, TARGET_PAD_RETURNS,
|
||||
TARGET_EXT_80387_CONSTANTS): Use it.
|
||||
(enum ix86_arch_indices): New.
|
||||
(ix86_arch_features): New.
|
||||
(TARGET_CMOVE, TARGET_CMPXCHG, TARGET_CMPXCHG8B, TARGET_XADD,
|
||||
TARGET_BSWAP): Use it.
|
||||
(ix86_tune_mask, ix86_arch_mask): Remove.
|
||||
|
||||
2007-03-06 Joseph Myers <joseph@codesourcery.com>
|
||||
|
||||
PR bootstrap/31020
|
||||
|
@ -1004,187 +1004,221 @@ const struct processor_costs *ix86_cost = &pentium_cost;
|
||||
(PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
|
||||
#define m_GENERIC (m_GENERIC32 | m_GENERIC64)
|
||||
|
||||
/* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
|
||||
Generic64 seems like good code size tradeoff. We can't enable it for 32bit
|
||||
generic because it is not working well with PPro base chips. */
|
||||
const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2
|
||||
| m_GENERIC64;
|
||||
const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
|
||||
/* Feature tests against the various tunings. */
|
||||
unsigned int ix86_tune_features[X86_TUNE_LAST] = {
|
||||
/* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
|
||||
negatively, so enabling for Generic64 seems like good code size
|
||||
tradeoff. We can't enable it for 32bit generic because it does not
|
||||
work well with PPro base chips. */
|
||||
m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
|
||||
|
||||
/* X86_TUNE_PUSH_MEMORY */
|
||||
m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
|
||||
| m_NOCONA | m_CORE2 | m_GENERIC,
|
||||
|
||||
/* X86_TUNE_ZERO_EXTEND_WITH_AND */
|
||||
m_486 | m_PENT,
|
||||
|
||||
/* X86_TUNE_USE_BIT_TEST */
|
||||
m_386,
|
||||
|
||||
/* X86_TUNE_UNROLL_STRLEN */
|
||||
m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
|
||||
|
||||
/* X86_TUNE_DEEP_BRANCH_PREDICTION */
|
||||
m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
|
||||
| m_NOCONA | m_CORE2 | m_GENERIC,
|
||||
|
||||
/* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
|
||||
on simulation result. But after P4 was made, no performance benefit
|
||||
was observed with branch hints. It also increases the code size.
|
||||
As a result, icc never generates branch hints. */
|
||||
0,
|
||||
|
||||
/* X86_TUNE_DOUBLE_WITH_ADD */
|
||||
~m_386,
|
||||
|
||||
/* X86_TUNE_USE_SAHF */
|
||||
m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32,
|
||||
/* | m_GENERIC | m_ATHLON_K8 ? */
|
||||
|
||||
/* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
|
||||
partial dependencies */
|
||||
m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
|
||||
| m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
|
||||
|
||||
/* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
|
||||
register stalls on Generic32 compilation setting as well. However
|
||||
in current implementation the partial register stalls are not eliminated
|
||||
very well - they can be introduced via subregs synthesized by combine
|
||||
and can happen in caller/callee saving sequences. Because this option
|
||||
pays back little on PPro based chips and is in conflict with partial reg
|
||||
dependencies used by Athlon/P4 based chips, it is better to leave it off
|
||||
for generic32 for now. */
|
||||
m_PPRO,
|
||||
|
||||
/* X86_TUNE_PARTIAL_FLAG_REG_STALL */
|
||||
m_CORE2 | m_GENERIC,
|
||||
|
||||
/* X86_TUNE_USE_HIMODE_FIOP */
|
||||
m_386 | m_486 | m_K6_GEODE,
|
||||
|
||||
/* X86_TUNE_USE_SIMODE_FIOP */
|
||||
~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
|
||||
|
||||
/* X86_TUNE_USE_MOV0 */
|
||||
m_K6,
|
||||
|
||||
/* X86_TUNE_USE_CLTD */
|
||||
~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
|
||||
|
||||
/* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
|
||||
m_PENT4,
|
||||
|
||||
/* X86_TUNE_SPLIT_LONG_MOVES */
|
||||
m_PPRO,
|
||||
|
||||
/* X86_TUNE_READ_MODIFY_WRITE */
|
||||
~m_PENT,
|
||||
|
||||
/* X86_TUNE_READ_MODIFY */
|
||||
~(m_PENT | m_PPRO),
|
||||
|
||||
/* X86_TUNE_PROMOTE_QIMODE */
|
||||
m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
|
||||
| m_GENERIC /* | m_PENT4 ? */,
|
||||
|
||||
/* X86_TUNE_FAST_PREFIX */
|
||||
~(m_PENT | m_486 | m_386),
|
||||
|
||||
/* X86_TUNE_SINGLE_STRINGOP */
|
||||
m_386 | m_PENT4 | m_NOCONA,
|
||||
|
||||
/* X86_TUNE_QIMODE_MATH */
|
||||
~0,
|
||||
|
||||
/* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
|
||||
register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
|
||||
might be considered for Generic32 if our scheme for avoiding partial
|
||||
stalls was more effective. */
|
||||
~m_PPRO,
|
||||
|
||||
/* X86_TUNE_PROMOTE_QI_REGS */
|
||||
0,
|
||||
|
||||
/* X86_TUNE_PROMOTE_HI_REGS */
|
||||
m_PPRO,
|
||||
|
||||
/* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
|
||||
m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
|
||||
|
||||
/* X86_TUNE_ADD_ESP_8 */
|
||||
m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
|
||||
| m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
|
||||
|
||||
/* X86_TUNE_SUB_ESP_4 */
|
||||
m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
|
||||
|
||||
/* X86_TUNE_SUB_ESP_8 */
|
||||
m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
|
||||
| m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
|
||||
|
||||
/* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
|
||||
for DFmode copies */
|
||||
~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
|
||||
| m_GENERIC | m_GEODE),
|
||||
|
||||
/* X86_TUNE_PARTIAL_REG_DEPENDENCY */
|
||||
m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
|
||||
|
||||
/* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
|
||||
conflict here in between PPro/Pentium4 based chips that thread 128bit
|
||||
SSE registers as single units versus K8 based chips that divide SSE
|
||||
registers to two 64bit halves. This knob promotes all store destinations
|
||||
to be 128bit to allow register renaming on 128bit SSE units, but usually
|
||||
results in one extra microop on 64bit SSE units. Experimental results
|
||||
shows that disabling this option on P4 brings over 20% SPECfp regression,
|
||||
while enabling it on K8 brings roughly 2.4% regression that can be partly
|
||||
masked by careful scheduling of moves. */
|
||||
m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
|
||||
|
||||
/* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
|
||||
m_AMDFAM10,
|
||||
|
||||
/* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
|
||||
are resolved on SSE register parts instead of whole registers, so we may
|
||||
maintain just lower part of scalar values in proper format leaving the
|
||||
upper part undefined. */
|
||||
m_ATHLON_K8,
|
||||
|
||||
/* X86_TUNE_SSE_TYPELESS_STORES */
|
||||
m_ATHLON_K8_AMDFAM10,
|
||||
|
||||
/* X86_TUNE_SSE_LOAD0_BY_PXOR */
|
||||
m_PPRO | m_PENT4 | m_NOCONA,
|
||||
|
||||
/* X86_TUNE_MEMORY_MISMATCH_STALL */
|
||||
m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
|
||||
|
||||
/* X86_TUNE_PROLOGUE_USING_MOVE */
|
||||
m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
|
||||
|
||||
/* X86_TUNE_EPILOGUE_USING_MOVE */
|
||||
m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
|
||||
|
||||
/* X86_TUNE_SHIFT1 */
|
||||
~m_486,
|
||||
|
||||
/* X86_TUNE_USE_FFREEP */
|
||||
m_ATHLON_K8_AMDFAM10,
|
||||
|
||||
/* X86_TUNE_INTER_UNIT_MOVES */
|
||||
~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
|
||||
|
||||
/* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
|
||||
than 4 branch instructions in the 16 byte window. */
|
||||
m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
|
||||
|
||||
/* X86_TUNE_SCHEDULE */
|
||||
m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
|
||||
|
||||
/* X86_TUNE_USE_BT */
|
||||
m_ATHLON_K8_AMDFAM10,
|
||||
|
||||
/* X86_TUNE_USE_INCDEC */
|
||||
~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC),
|
||||
|
||||
/* X86_TUNE_PAD_RETURNS */
|
||||
m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
|
||||
|
||||
/* X86_TUNE_EXT_80387_CONSTANTS */
|
||||
m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC
|
||||
};
|
||||
|
||||
/* Feature tests against the various architecture variations. */
|
||||
unsigned int ix86_arch_features[X86_ARCH_LAST] = {
|
||||
/* X86_ARCH_CMOVE */
|
||||
m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA,
|
||||
|
||||
/* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
|
||||
~m_386,
|
||||
|
||||
/* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
|
||||
~(m_386 | m_486),
|
||||
|
||||
/* X86_ARCH_XADD: Exchange and add was added for 80486. */
|
||||
~m_386,
|
||||
|
||||
/* X86_ARCH_BSWAP: Byteswap was added for 80486. */
|
||||
~m_386,
|
||||
};
|
||||
|
||||
static const unsigned int x86_accumulate_outgoing_args
|
||||
= m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
|
||||
|
||||
static const unsigned int x86_arch_always_fancy_math_387
|
||||
= m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
|
||||
| m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
const int x86_zero_extend_with_and = m_486 | m_PENT;
|
||||
/* Enable to zero extend integer registers to avoid partial dependencies */
|
||||
const int x86_movx = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
|
||||
| m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
|
||||
const int x86_double_with_add = ~m_386;
|
||||
const int x86_use_bit_test = m_386;
|
||||
const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10
|
||||
| m_K6 | m_CORE2 | m_GENERIC;
|
||||
const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
|
||||
| m_NOCONA;
|
||||
const int x86_3dnow_a = m_ATHLON_K8_AMDFAM10;
|
||||
const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10
|
||||
| m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
/* Branch hints were put in P4 based on simulation result. But
|
||||
after P4 was made, no performance benefit was observed with
|
||||
branch hints. It also increases the code size. As the result,
|
||||
icc never generates branch hints. */
|
||||
const int x86_branch_hints = 0;
|
||||
const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32;
|
||||
/*m_GENERIC | m_ATHLON_K8 ? */
|
||||
/* We probably ought to watch for partial register stalls on Generic32
|
||||
compilation setting as well. However in current implementation the
|
||||
partial register stalls are not eliminated very well - they can
|
||||
be introduced via subregs synthesized by combine and can happen
|
||||
in caller/callee saving sequences.
|
||||
Because this option pays back little on PPro based chips and is in conflict
|
||||
with partial reg. dependencies used by Athlon/P4 based chips, it is better
|
||||
to leave it off for generic32 for now. */
|
||||
const int x86_partial_reg_stall = m_PPRO;
|
||||
const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC;
|
||||
const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
|
||||
const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT
|
||||
| m_CORE2 | m_GENERIC);
|
||||
const int x86_use_mov0 = m_K6;
|
||||
const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
|
||||
/* Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
|
||||
const int x86_use_xchgb = m_PENT4;
|
||||
const int x86_read_modify_write = ~m_PENT;
|
||||
const int x86_read_modify = ~(m_PENT | m_PPRO);
|
||||
const int x86_split_long_moves = m_PPRO;
|
||||
const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486
|
||||
| m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
|
||||
/* m_PENT4 ? */
|
||||
const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
|
||||
const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
|
||||
const int x86_qimode_math = ~(0);
|
||||
const int x86_promote_qi_regs = 0;
|
||||
/* On PPro this flag is meant to avoid partial register stalls. Just like
|
||||
the x86_partial_reg_stall this option might be considered for Generic32
|
||||
if our scheme for avoiding partial stalls was more effective. */
|
||||
const int x86_himode_math = ~(m_PPRO);
|
||||
const int x86_promote_hi_regs = m_PPRO;
|
||||
/* Enable if add/sub rsp is preferred over 1 or 2 push/pop */
|
||||
const int x86_sub_esp_4 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
|
||||
| m_CORE2 | m_GENERIC;
|
||||
const int x86_sub_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
|
||||
| m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
const int x86_add_esp_4 = m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA
|
||||
| m_CORE2 | m_GENERIC;
|
||||
const int x86_add_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
|
||||
| m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
/* Enable if integer moves are preferred for DFmode copies */
|
||||
const int x86_integer_DFmode_moves = ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
|
||||
| m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
|
||||
const int x86_partial_reg_dependency = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
|
||||
| m_CORE2 | m_GENERIC;
|
||||
const int x86_memory_mismatch_stall = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
|
||||
| m_CORE2 | m_GENERIC;
|
||||
/* If ACCUMULATE_OUTGOING_ARGS is enabled, the maximum amount of space required
|
||||
for outgoing arguments will be computed and placed into the variable
|
||||
`current_function_outgoing_args_size'. No space will be pushed onto the stack
|
||||
for each call; instead, the function prologue should increase the stack frame
|
||||
size by this amount. Setting both PUSH_ARGS and ACCUMULATE_OUTGOING_ARGS is
|
||||
not proper. */
|
||||
const int x86_accumulate_outgoing_args = m_ATHLON_K8_AMDFAM10 | m_PENT4
|
||||
| m_NOCONA | m_PPRO | m_CORE2
|
||||
| m_GENERIC;
|
||||
const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
|
||||
const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
|
||||
const int x86_shift1 = ~m_486;
|
||||
const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO
|
||||
| m_ATHLON_K8_AMDFAM10 | m_PENT4
|
||||
| m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
/* In Generic model we have an conflict here in between PPro/Pentium4 based chips
|
||||
that thread 128bit SSE registers as single units versus K8 based chips that
|
||||
divide SSE registers to two 64bit halves.
|
||||
x86_sse_partial_reg_dependency promote all store destinations to be 128bit
|
||||
to allow register renaming on 128bit SSE units, but usually results in one
|
||||
extra microop on 64bit SSE units. Experimental results shows that disabling
|
||||
this option on P4 brings over 20% SPECfp regression, while enabling it on
|
||||
K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
|
||||
of moves. */
|
||||
const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
|
||||
| m_GENERIC | m_AMDFAM10;
|
||||
/* Set for machines where the type and dependencies are resolved on SSE
|
||||
register parts instead of whole registers, so we may maintain just
|
||||
lower part of scalar values in proper format leaving the upper part
|
||||
undefined. */
|
||||
const int x86_sse_split_regs = m_ATHLON_K8;
|
||||
/* Code generation for scalar reg-reg moves of single and double precision data:
|
||||
if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
|
||||
movaps reg, reg
|
||||
else
|
||||
movss reg, reg
|
||||
if (x86_sse_partial_reg_dependency == true)
|
||||
movapd reg, reg
|
||||
else
|
||||
movsd reg, reg
|
||||
|
||||
Code generation for scalar loads of double precision data:
|
||||
if (x86_sse_split_regs == true)
|
||||
movlpd mem, reg (gas syntax)
|
||||
else
|
||||
movsd mem, reg
|
||||
|
||||
Code generation for unaligned packed loads of single precision data
|
||||
(x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
|
||||
if (x86_sse_unaligned_move_optimal)
|
||||
movups mem, reg
|
||||
|
||||
if (x86_sse_partial_reg_dependency == true)
|
||||
{
|
||||
xorps reg, reg
|
||||
movlps mem, reg
|
||||
movhps mem+8, reg
|
||||
}
|
||||
else
|
||||
{
|
||||
movlps mem, reg
|
||||
movhps mem+8, reg
|
||||
}
|
||||
|
||||
Code generation for unaligned packed loads of double precision data
|
||||
(x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
|
||||
if (x86_sse_unaligned_move_optimal)
|
||||
movupd mem, reg
|
||||
|
||||
if (x86_sse_split_regs == true)
|
||||
{
|
||||
movlpd mem, reg
|
||||
movhpd mem+8, reg
|
||||
}
|
||||
else
|
||||
{
|
||||
movsd mem, reg
|
||||
movhpd mem+8, reg
|
||||
}
|
||||
*/
|
||||
const int x86_sse_unaligned_move_optimal = m_AMDFAM10;
|
||||
const int x86_sse_typeless_stores = m_ATHLON_K8_AMDFAM10;
|
||||
const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
|
||||
const int x86_use_ffreep = m_ATHLON_K8_AMDFAM10;
|
||||
const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
|
||||
|
||||
const int x86_inter_unit_moves = ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC);
|
||||
|
||||
const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON_K8 | m_PENT4
|
||||
| m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
|
||||
/* Some CPU cores are not able to predict more than 4 branch instructions in
|
||||
the 16 byte window. */
|
||||
const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
|
||||
| m_NOCONA | m_CORE2 | m_GENERIC;
|
||||
const int x86_schedule = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT
|
||||
| m_CORE2 | m_GENERIC;
|
||||
const int x86_use_bt = m_ATHLON_K8_AMDFAM10;
|
||||
/* Compare and exchange was added for 80486. */
|
||||
const int x86_cmpxchg = ~m_386;
|
||||
/* Compare and exchange 8 bytes was added for pentium. */
|
||||
const int x86_cmpxchg8b = ~(m_386 | m_486);
|
||||
/* Exchange and add was added for 80486. */
|
||||
const int x86_xadd = ~m_386;
|
||||
/* Byteswap was added for 80486. */
|
||||
const int x86_bswap = ~m_386;
|
||||
const int x86_pad_returns = m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
|
||||
|
||||
static enum stringop_alg stringop_alg = no_stringop;
|
||||
|
||||
@ -1397,11 +1431,9 @@ enum fpmath_unit ix86_fpmath;
|
||||
|
||||
/* Which cpu are we scheduling for. */
|
||||
enum processor_type ix86_tune;
|
||||
int ix86_tune_mask;
|
||||
|
||||
/* Which instruction set architecture to use. */
|
||||
enum processor_type ix86_arch;
|
||||
int ix86_arch_mask;
|
||||
|
||||
/* true if sse prefetch instruction is not NOOP. */
|
||||
int x86_prefetch_sse;
|
||||
@ -1811,6 +1843,7 @@ override_options (void)
|
||||
{
|
||||
int i;
|
||||
int ix86_tune_defaulted = 0;
|
||||
unsigned int ix86_arch_mask, ix86_tune_mask;
|
||||
|
||||
/* Comes from final.c -- no real reason to change it. */
|
||||
#define MAX_CODE_ALIGN 16
|
||||
@ -2124,6 +2157,10 @@ override_options (void)
|
||||
if (i == pta_size)
|
||||
error ("bad value (%s) for -march= switch", ix86_arch_string);
|
||||
|
||||
ix86_arch_mask = 1u << ix86_arch;
|
||||
for (i = 0; i < X86_ARCH_LAST; ++i)
|
||||
ix86_arch_features[i] &= ix86_arch_mask;
|
||||
|
||||
for (i = 0; i < pta_size; i++)
|
||||
if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
|
||||
{
|
||||
@ -2155,8 +2192,9 @@ override_options (void)
|
||||
if (i == pta_size)
|
||||
error ("bad value (%s) for -mtune= switch", ix86_tune_string);
|
||||
|
||||
ix86_arch_mask = 1 << ix86_arch;
|
||||
ix86_tune_mask = 1 << ix86_tune;
|
||||
ix86_tune_mask = 1u << ix86_tune;
|
||||
for (i = 0; i < X86_TUNE_LAST; ++i)
|
||||
ix86_tune_features[i] &= ix86_tune_mask;
|
||||
|
||||
if (optimize_size)
|
||||
ix86_cost = &size_cost;
|
||||
@ -2366,7 +2404,6 @@ override_options (void)
|
||||
error ("-msseregparm used without SSE enabled");
|
||||
|
||||
ix86_fpmath = TARGET_FPMATH_DEFAULT;
|
||||
|
||||
if (ix86_fpmath_string != 0)
|
||||
{
|
||||
if (! strcmp (ix86_fpmath_string, "387"))
|
||||
@ -2425,6 +2462,15 @@ override_options (void)
|
||||
target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
|
||||
}
|
||||
|
||||
/* For sane SSE instruction set generation we need fcomi instruction.
|
||||
It is safe to enable all CMOVE instructions. */
|
||||
if (TARGET_SSE)
|
||||
TARGET_CMOVE = 1;
|
||||
|
||||
/* ??? Any idea why this is unconditionally disabled for 64-bit? */
|
||||
if (TARGET_64BIT)
|
||||
TARGET_USE_SAHF = 0;
|
||||
|
||||
/* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
|
||||
{
|
||||
char *p;
|
||||
@ -4999,7 +5045,7 @@ standard_80387_constant_p (rtx x)
|
||||
/* For XFmode constants, try to find a special 80387 instruction when
|
||||
optimizing for size or on those CPUs that benefit from them. */
|
||||
if (GET_MODE (x) == XFmode
|
||||
&& (optimize_size || x86_ext_80387_constants & ix86_tune_mask))
|
||||
&& (optimize_size || TARGET_EXT_80387_CONSTANTS))
|
||||
{
|
||||
int i;
|
||||
|
||||
@ -9499,6 +9545,55 @@ ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
|
||||
|
||||
/* Implement the movmisalign patterns for SSE. Non-SSE modes go
|
||||
straight to ix86_expand_vector_move. */
|
||||
/* Code generation for scalar reg-reg moves of single and double precision data:
|
||||
if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
|
||||
movaps reg, reg
|
||||
else
|
||||
movss reg, reg
|
||||
if (x86_sse_partial_reg_dependency == true)
|
||||
movapd reg, reg
|
||||
else
|
||||
movsd reg, reg
|
||||
|
||||
Code generation for scalar loads of double precision data:
|
||||
if (x86_sse_split_regs == true)
|
||||
movlpd mem, reg (gas syntax)
|
||||
else
|
||||
movsd mem, reg
|
||||
|
||||
Code generation for unaligned packed loads of single precision data
|
||||
(x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
|
||||
if (x86_sse_unaligned_move_optimal)
|
||||
movups mem, reg
|
||||
|
||||
if (x86_sse_partial_reg_dependency == true)
|
||||
{
|
||||
xorps reg, reg
|
||||
movlps mem, reg
|
||||
movhps mem+8, reg
|
||||
}
|
||||
else
|
||||
{
|
||||
movlps mem, reg
|
||||
movhps mem+8, reg
|
||||
}
|
||||
|
||||
Code generation for unaligned packed loads of double precision data
|
||||
(x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
|
||||
if (x86_sse_unaligned_move_optimal)
|
||||
movupd mem, reg
|
||||
|
||||
if (x86_sse_split_regs == true)
|
||||
{
|
||||
movlpd mem, reg
|
||||
movhpd mem+8, reg
|
||||
}
|
||||
else
|
||||
{
|
||||
movsd mem, reg
|
||||
movhpd mem+8, reg
|
||||
}
|
||||
*/
|
||||
|
||||
void
|
||||
ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
|
||||
|
@ -179,111 +179,166 @@ extern const struct processor_costs *ix86_cost;
|
||||
#define TARGET_GENERIC (TARGET_GENERIC32 || TARGET_GENERIC64)
|
||||
#define TARGET_AMDFAM10 (ix86_tune == PROCESSOR_AMDFAM10)
|
||||
|
||||
extern const int x86_use_leave, x86_push_memory, x86_zero_extend_with_and;
|
||||
extern const int x86_use_bit_test, x86_cmove, x86_deep_branch;
|
||||
extern const int x86_branch_hints, x86_unroll_strlen;
|
||||
extern const int x86_double_with_add, x86_partial_reg_stall, x86_movx;
|
||||
extern const int x86_use_himode_fiop, x86_use_simode_fiop;
|
||||
extern const int x86_use_mov0, x86_use_cltd, x86_use_xchgb;
|
||||
extern const int x86_read_modify_write, x86_read_modify, x86_split_long_moves;
|
||||
extern const int x86_promote_QImode, x86_single_stringop, x86_fast_prefix;
|
||||
extern const int x86_himode_math, x86_qimode_math, x86_promote_qi_regs;
|
||||
extern const int x86_promote_hi_regs, x86_integer_DFmode_moves;
|
||||
extern const int x86_add_esp_4, x86_add_esp_8, x86_sub_esp_4, x86_sub_esp_8;
|
||||
extern const int x86_partial_reg_dependency, x86_memory_mismatch_stall;
|
||||
extern const int x86_accumulate_outgoing_args, x86_prologue_using_move;
|
||||
extern const int x86_epilogue_using_move, x86_decompose_lea;
|
||||
extern const int x86_arch_always_fancy_math_387, x86_shift1;
|
||||
extern const int x86_sse_partial_reg_dependency, x86_sse_split_regs;
|
||||
extern const int x86_sse_unaligned_move_optimal;
|
||||
extern const int x86_sse_typeless_stores, x86_sse_load0_by_pxor;
|
||||
extern const int x86_use_ffreep;
|
||||
extern const int x86_inter_unit_moves, x86_schedule;
|
||||
extern const int x86_use_bt;
|
||||
extern const int x86_cmpxchg, x86_cmpxchg8b, x86_xadd;
|
||||
extern const int x86_use_incdec;
|
||||
extern const int x86_pad_returns;
|
||||
extern const int x86_bswap;
|
||||
extern const int x86_partial_flag_reg_stall;
|
||||
extern int x86_prefetch_sse, x86_cmpxchg16b;
|
||||
/* Feature tests against the various tunings. */
|
||||
enum ix86_tune_indices {
|
||||
X86_TUNE_USE_LEAVE,
|
||||
X86_TUNE_PUSH_MEMORY,
|
||||
X86_TUNE_ZERO_EXTEND_WITH_AND,
|
||||
X86_TUNE_USE_BIT_TEST,
|
||||
X86_TUNE_UNROLL_STRLEN,
|
||||
X86_TUNE_DEEP_BRANCH_PREDICTION,
|
||||
X86_TUNE_BRANCH_PREDICTION_HINTS,
|
||||
X86_TUNE_DOUBLE_WITH_ADD,
|
||||
X86_TUNE_USE_SAHF, /* && !TARGET_64BIT */
|
||||
X86_TUNE_MOVX,
|
||||
X86_TUNE_PARTIAL_REG_STALL,
|
||||
X86_TUNE_PARTIAL_FLAG_REG_STALL,
|
||||
X86_TUNE_USE_HIMODE_FIOP,
|
||||
X86_TUNE_USE_SIMODE_FIOP,
|
||||
X86_TUNE_USE_MOV0,
|
||||
X86_TUNE_USE_CLTD,
|
||||
X86_TUNE_USE_XCHGB,
|
||||
X86_TUNE_SPLIT_LONG_MOVES,
|
||||
X86_TUNE_READ_MODIFY_WRITE,
|
||||
X86_TUNE_READ_MODIFY,
|
||||
X86_TUNE_PROMOTE_QIMODE,
|
||||
X86_TUNE_FAST_PREFIX,
|
||||
X86_TUNE_SINGLE_STRINGOP,
|
||||
X86_TUNE_QIMODE_MATH,
|
||||
X86_TUNE_HIMODE_MATH,
|
||||
X86_TUNE_PROMOTE_QI_REGS,
|
||||
X86_TUNE_PROMOTE_HI_REGS,
|
||||
X86_TUNE_ADD_ESP_4,
|
||||
X86_TUNE_ADD_ESP_8,
|
||||
X86_TUNE_SUB_ESP_4,
|
||||
X86_TUNE_SUB_ESP_8,
|
||||
X86_TUNE_INTEGER_DFMODE_MOVES,
|
||||
X86_TUNE_PARTIAL_REG_DEPENDENCY,
|
||||
X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY,
|
||||
X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL,
|
||||
X86_TUNE_SSE_SPLIT_REGS,
|
||||
X86_TUNE_SSE_TYPELESS_STORES,
|
||||
X86_TUNE_SSE_LOAD0_BY_PXOR,
|
||||
X86_TUNE_MEMORY_MISMATCH_STALL,
|
||||
X86_TUNE_PROLOGUE_USING_MOVE,
|
||||
X86_TUNE_EPILOGUE_USING_MOVE,
|
||||
X86_TUNE_SHIFT1,
|
||||
X86_TUNE_USE_FFREEP,
|
||||
X86_TUNE_INTER_UNIT_MOVES,
|
||||
X86_TUNE_FOUR_JUMP_LIMIT,
|
||||
X86_TUNE_SCHEDULE,
|
||||
X86_TUNE_USE_BT,
|
||||
X86_TUNE_USE_INCDEC,
|
||||
X86_TUNE_PAD_RETURNS,
|
||||
X86_TUNE_EXT_80387_CONSTANTS,
|
||||
|
||||
#define TARGET_USE_LEAVE (x86_use_leave & ix86_tune_mask)
|
||||
#define TARGET_PUSH_MEMORY (x86_push_memory & ix86_tune_mask)
|
||||
#define TARGET_ZERO_EXTEND_WITH_AND (x86_zero_extend_with_and & ix86_tune_mask)
|
||||
#define TARGET_USE_BIT_TEST (x86_use_bit_test & ix86_tune_mask)
|
||||
#define TARGET_UNROLL_STRLEN (x86_unroll_strlen & ix86_tune_mask)
|
||||
/* For sane SSE instruction set generation we need fcomi instruction. It is
|
||||
safe to enable all CMOVE instructions. */
|
||||
#define TARGET_CMOVE ((x86_cmove & ix86_arch_mask) || TARGET_SSE)
|
||||
#define TARGET_FISTTP (TARGET_SSE3 && TARGET_80387)
|
||||
#define TARGET_DEEP_BRANCH_PREDICTION (x86_deep_branch & ix86_tune_mask)
|
||||
#define TARGET_BRANCH_PREDICTION_HINTS (x86_branch_hints & ix86_tune_mask)
|
||||
#define TARGET_DOUBLE_WITH_ADD (x86_double_with_add & ix86_tune_mask)
|
||||
#define TARGET_USE_SAHF ((x86_use_sahf & ix86_tune_mask) && !TARGET_64BIT)
|
||||
#define TARGET_MOVX (x86_movx & ix86_tune_mask)
|
||||
#define TARGET_PARTIAL_REG_STALL (x86_partial_reg_stall & ix86_tune_mask)
|
||||
X86_TUNE_LAST
|
||||
};
|
||||
|
||||
extern unsigned int ix86_tune_features[X86_TUNE_LAST];
|
||||
|
||||
#define TARGET_USE_LEAVE ix86_tune_features[X86_TUNE_USE_LEAVE]
|
||||
#define TARGET_PUSH_MEMORY ix86_tune_features[X86_TUNE_PUSH_MEMORY]
|
||||
#define TARGET_ZERO_EXTEND_WITH_AND \
|
||||
ix86_tune_features[X86_TUNE_ZERO_EXTEND_WITH_AND]
|
||||
#define TARGET_USE_BIT_TEST ix86_tune_features[X86_TUNE_USE_BIT_TEST]
|
||||
#define TARGET_UNROLL_STRLEN ix86_tune_features[X86_TUNE_UNROLL_STRLEN]
|
||||
#define TARGET_DEEP_BRANCH_PREDICTION \
|
||||
ix86_tune_features[X86_TUNE_DEEP_BRANCH_PREDICTION]
|
||||
#define TARGET_BRANCH_PREDICTION_HINTS \
|
||||
ix86_tune_features[X86_TUNE_BRANCH_PREDICTION_HINTS]
|
||||
#define TARGET_DOUBLE_WITH_ADD ix86_tune_features[X86_TUNE_DOUBLE_WITH_ADD]
|
||||
#define TARGET_USE_SAHF ix86_tune_features[X86_TUNE_USE_SAHF]
|
||||
#define TARGET_MOVX ix86_tune_features[X86_TUNE_MOVX]
|
||||
#define TARGET_PARTIAL_REG_STALL ix86_tune_features[X86_TUNE_PARTIAL_REG_STALL]
|
||||
#define TARGET_PARTIAL_FLAG_REG_STALL \
|
||||
(x86_partial_flag_reg_stall & ix86_tune_mask)
|
||||
#define TARGET_USE_HIMODE_FIOP (x86_use_himode_fiop & ix86_tune_mask)
|
||||
#define TARGET_USE_SIMODE_FIOP (x86_use_simode_fiop & ix86_tune_mask)
|
||||
#define TARGET_USE_MOV0 (x86_use_mov0 & ix86_tune_mask)
|
||||
#define TARGET_USE_CLTD (x86_use_cltd & ix86_tune_mask)
|
||||
#define TARGET_USE_XCHGB (x86_use_xchgb & ix86_tune_mask)
|
||||
#define TARGET_SPLIT_LONG_MOVES (x86_split_long_moves & ix86_tune_mask)
|
||||
#define TARGET_READ_MODIFY_WRITE (x86_read_modify_write & ix86_tune_mask)
|
||||
#define TARGET_READ_MODIFY (x86_read_modify & ix86_tune_mask)
|
||||
#define TARGET_PROMOTE_QImode (x86_promote_QImode & ix86_tune_mask)
|
||||
#define TARGET_FAST_PREFIX (x86_fast_prefix & ix86_tune_mask)
|
||||
#define TARGET_SINGLE_STRINGOP (x86_single_stringop & ix86_tune_mask)
|
||||
#define TARGET_QIMODE_MATH (x86_qimode_math & ix86_tune_mask)
|
||||
#define TARGET_HIMODE_MATH (x86_himode_math & ix86_tune_mask)
|
||||
#define TARGET_PROMOTE_QI_REGS (x86_promote_qi_regs & ix86_tune_mask)
|
||||
#define TARGET_PROMOTE_HI_REGS (x86_promote_hi_regs & ix86_tune_mask)
|
||||
#define TARGET_ADD_ESP_4 (x86_add_esp_4 & ix86_tune_mask)
|
||||
#define TARGET_ADD_ESP_8 (x86_add_esp_8 & ix86_tune_mask)
|
||||
#define TARGET_SUB_ESP_4 (x86_sub_esp_4 & ix86_tune_mask)
|
||||
#define TARGET_SUB_ESP_8 (x86_sub_esp_8 & ix86_tune_mask)
|
||||
#define TARGET_INTEGER_DFMODE_MOVES (x86_integer_DFmode_moves & ix86_tune_mask)
|
||||
ix86_tune_features[X86_TUNE_PARTIAL_FLAG_REG_STALL]
|
||||
#define TARGET_USE_HIMODE_FIOP ix86_tune_features[X86_TUNE_USE_HIMODE_FIOP]
|
||||
#define TARGET_USE_SIMODE_FIOP ix86_tune_features[X86_TUNE_USE_SIMODE_FIOP]
|
||||
#define TARGET_USE_MOV0 ix86_tune_features[X86_TUNE_USE_MOV0]
|
||||
#define TARGET_USE_CLTD ix86_tune_features[X86_TUNE_USE_CLTD]
|
||||
#define TARGET_USE_XCHGB ix86_tune_features[X86_TUNE_USE_XCHGB]
|
||||
#define TARGET_SPLIT_LONG_MOVES ix86_tune_features[X86_TUNE_SPLIT_LONG_MOVES]
|
||||
#define TARGET_READ_MODIFY_WRITE ix86_tune_features[X86_TUNE_READ_MODIFY_WRITE]
|
||||
#define TARGET_READ_MODIFY ix86_tune_features[X86_TUNE_READ_MODIFY]
|
||||
#define TARGET_PROMOTE_QImode ix86_tune_features[X86_TUNE_PROMOTE_QIMODE]
|
||||
#define TARGET_FAST_PREFIX ix86_tune_features[X86_TUNE_FAST_PREFIX]
|
||||
#define TARGET_SINGLE_STRINGOP ix86_tune_features[X86_TUNE_SINGLE_STRINGOP]
|
||||
#define TARGET_QIMODE_MATH ix86_tune_features[X86_TUNE_QIMODE_MATH]
|
||||
#define TARGET_HIMODE_MATH ix86_tune_features[X86_TUNE_HIMODE_MATH]
|
||||
#define TARGET_PROMOTE_QI_REGS ix86_tune_features[X86_TUNE_PROMOTE_QI_REGS]
|
||||
#define TARGET_PROMOTE_HI_REGS ix86_tune_features[X86_TUNE_PROMOTE_HI_REGS]
|
||||
#define TARGET_ADD_ESP_4 ix86_tune_features[X86_TUNE_ADD_ESP_4]
|
||||
#define TARGET_ADD_ESP_8 ix86_tune_features[X86_TUNE_ADD_ESP_8]
|
||||
#define TARGET_SUB_ESP_4 ix86_tune_features[X86_TUNE_SUB_ESP_4]
|
||||
#define TARGET_SUB_ESP_8 ix86_tune_features[X86_TUNE_SUB_ESP_8]
|
||||
#define TARGET_INTEGER_DFMODE_MOVES \
|
||||
ix86_tune_features[X86_TUNE_INTEGER_DFMODE_MOVES]
|
||||
#define TARGET_PARTIAL_REG_DEPENDENCY \
|
||||
(x86_partial_reg_dependency & ix86_tune_mask)
|
||||
ix86_tune_features[X86_TUNE_PARTIAL_REG_DEPENDENCY]
|
||||
#define TARGET_SSE_PARTIAL_REG_DEPENDENCY \
|
||||
(x86_sse_partial_reg_dependency & ix86_tune_mask)
|
||||
ix86_tune_features[X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY]
|
||||
#define TARGET_SSE_UNALIGNED_MOVE_OPTIMAL \
|
||||
(x86_sse_unaligned_move_optimal & ix86_tune_mask)
|
||||
#define TARGET_SSE_SPLIT_REGS (x86_sse_split_regs & ix86_tune_mask)
|
||||
#define TARGET_SSE_TYPELESS_STORES (x86_sse_typeless_stores & ix86_tune_mask)
|
||||
#define TARGET_SSE_LOAD0_BY_PXOR (x86_sse_load0_by_pxor & ix86_tune_mask)
|
||||
ix86_tune_features[X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL]
|
||||
#define TARGET_SSE_SPLIT_REGS ix86_tune_features[X86_TUNE_SSE_SPLIT_REGS]
|
||||
#define TARGET_SSE_TYPELESS_STORES \
|
||||
ix86_tune_features[X86_TUNE_SSE_TYPELESS_STORES]
|
||||
#define TARGET_SSE_LOAD0_BY_PXOR ix86_tune_features[X86_TUNE_SSE_LOAD0_BY_PXOR]
|
||||
#define TARGET_MEMORY_MISMATCH_STALL \
|
||||
(x86_memory_mismatch_stall & ix86_tune_mask)
|
||||
#define TARGET_PROLOGUE_USING_MOVE (x86_prologue_using_move & ix86_tune_mask)
|
||||
#define TARGET_EPILOGUE_USING_MOVE (x86_epilogue_using_move & ix86_tune_mask)
|
||||
#define TARGET_PREFETCH_SSE (x86_prefetch_sse)
|
||||
#define TARGET_SHIFT1 (x86_shift1 & ix86_tune_mask)
|
||||
#define TARGET_USE_FFREEP (x86_use_ffreep & ix86_tune_mask)
|
||||
#define TARGET_INTER_UNIT_MOVES (x86_inter_unit_moves & ix86_tune_mask)
|
||||
#define TARGET_FOUR_JUMP_LIMIT (x86_four_jump_limit & ix86_tune_mask)
|
||||
#define TARGET_SCHEDULE (x86_schedule & ix86_tune_mask)
|
||||
#define TARGET_USE_BT (x86_use_bt & ix86_tune_mask)
|
||||
#define TARGET_USE_INCDEC (x86_use_incdec & ix86_tune_mask)
|
||||
#define TARGET_PAD_RETURNS (x86_pad_returns & ix86_tune_mask)
|
||||
ix86_tune_features[X86_TUNE_MEMORY_MISMATCH_STALL]
|
||||
#define TARGET_PROLOGUE_USING_MOVE \
|
||||
ix86_tune_features[X86_TUNE_PROLOGUE_USING_MOVE]
|
||||
#define TARGET_EPILOGUE_USING_MOVE \
|
||||
ix86_tune_features[X86_TUNE_EPILOGUE_USING_MOVE]
|
||||
#define TARGET_SHIFT1 ix86_tune_features[X86_TUNE_SHIFT1]
|
||||
#define TARGET_USE_FFREEP ix86_tune_features[X86_TUNE_USE_FFREEP]
|
||||
#define TARGET_INTER_UNIT_MOVES ix86_tune_features[X86_TUNE_INTER_UNIT_MOVES]
|
||||
#define TARGET_FOUR_JUMP_LIMIT ix86_tune_features[X86_TUNE_FOUR_JUMP_LIMIT]
|
||||
#define TARGET_SCHEDULE ix86_tune_features[X86_TUNE_SCHEDULE]
|
||||
#define TARGET_USE_BT ix86_tune_features[X86_TUNE_USE_BT]
|
||||
#define TARGET_USE_INCDEC ix86_tune_features[X86_TUNE_USE_INCDEC]
|
||||
#define TARGET_PAD_RETURNS ix86_tune_features[X86_TUNE_PAD_RETURNS]
|
||||
#define TARGET_EXT_80387_CONSTANTS \
|
||||
ix86_tune_features[X86_TUNE_EXT_80387_CONSTANTS]
|
||||
|
||||
/* Feature tests against the various architecture variations. */
|
||||
enum ix86_arch_indices {
|
||||
X86_ARCH_CMOVE, /* || TARGET_SSE */
|
||||
X86_ARCH_CMPXCHG,
|
||||
X86_ARCH_CMPXCHG8B,
|
||||
X86_ARCH_XADD,
|
||||
X86_ARCH_BSWAP,
|
||||
|
||||
X86_ARCH_LAST
|
||||
};
|
||||
|
||||
extern unsigned int ix86_arch_features[X86_ARCH_LAST];
|
||||
|
||||
#define TARGET_CMOVE ix86_arch_features[X86_ARCH_CMOVE]
|
||||
#define TARGET_CMPXCHG ix86_arch_features[X86_ARCH_CMPXCHG]
|
||||
#define TARGET_CMPXCHG8B ix86_arch_features[X86_ARCH_CMPXCHG8B]
|
||||
#define TARGET_XADD ix86_arch_features[X86_ARCH_XADD]
|
||||
#define TARGET_BSWAP ix86_arch_features[X86_ARCH_BSWAP]
|
||||
|
||||
#define TARGET_FISTTP (TARGET_SSE3 && TARGET_80387)
|
||||
|
||||
extern int x86_prefetch_sse;
|
||||
#define TARGET_PREFETCH_SSE x86_prefetch_sse
|
||||
|
||||
extern int x86_cmpxchg16b;
|
||||
#define TARGET_CMPXCHG16B x86_cmpxchg16b
|
||||
|
||||
#define ASSEMBLER_DIALECT (ix86_asm_dialect)
|
||||
|
||||
#define TARGET_SSE_MATH ((ix86_fpmath & FPMATH_SSE) != 0)
|
||||
#define TARGET_MIX_SSE_I387 ((ix86_fpmath & FPMATH_SSE) \
|
||||
&& (ix86_fpmath & FPMATH_387))
|
||||
#define TARGET_MIX_SSE_I387 \
|
||||
((ix86_fpmath & (FPMATH_SSE | FPMATH_387)) == (FPMATH_SSE | FPMATH_387))
|
||||
|
||||
#define TARGET_GNU_TLS (ix86_tls_dialect == TLS_DIALECT_GNU)
|
||||
#define TARGET_GNU2_TLS (ix86_tls_dialect == TLS_DIALECT_GNU2)
|
||||
#define TARGET_ANY_GNU_TLS (TARGET_GNU_TLS || TARGET_GNU2_TLS)
|
||||
#define TARGET_SUN_TLS (ix86_tls_dialect == TLS_DIALECT_SUN)
|
||||
|
||||
#define TARGET_CMPXCHG (x86_cmpxchg & ix86_arch_mask)
|
||||
#define TARGET_CMPXCHG8B (x86_cmpxchg8b & ix86_arch_mask)
|
||||
#define TARGET_CMPXCHG16B (x86_cmpxchg16b)
|
||||
#define TARGET_XADD (x86_xadd & ix86_arch_mask)
|
||||
#define TARGET_BSWAP (x86_bswap & ix86_arch_mask)
|
||||
|
||||
#ifndef TARGET_64BIT_DEFAULT
|
||||
#define TARGET_64BIT_DEFAULT 0
|
||||
#endif
|
||||
@ -2132,10 +2187,7 @@ enum processor_type
|
||||
};
|
||||
|
||||
extern enum processor_type ix86_tune;
|
||||
extern int ix86_tune_mask;
|
||||
|
||||
extern enum processor_type ix86_arch;
|
||||
extern int ix86_arch_mask;
|
||||
|
||||
enum fpmath_unit
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user