mirror of
https://gcc.gnu.org/git/gcc.git
synced 2024-11-30 07:14:09 +08:00
i386: Zhaoxin yongfeng enablement
Enable -march/-mtune=yongfeng. Costs and tunings are set according to the characteristics of the processor. Add a new .md file to describe yongfeng processor. gcc/ChangeLog: * common/config/i386/cpuinfo.h (get_zhaoxin_cpu): Recognize yongfeng. * common/config/i386/i386-common.cc: Add yongfeng. * common/config/i386/i386-cpuinfo.h (enum processor_subtypes): Add ZHAOXIN_FAM7H_YONGFENG. * config.gcc: Add yongfeng. * config/i386/driver-i386.cc (host_detect_local_cpu): Let -march=native recognize yongfeng processors. * config/i386/i386-c.cc (ix86_target_macros_internal): Add yongfeng. * config/i386/i386-options.cc (m_YONGFENG): New definition. (m_ZHAOXIN): Ditto. * config/i386/i386.h (enum processor_type): Add PROCESSOR_YONGFENG. * config/i386/i386.md: Add yongfeng. * config/i386/lujiazui.md: Fix typo. * config/i386/x86-tune-costs.h (struct processor_costs): Add yongfeng costs. * config/i386/x86-tune-sched.cc (ix86_issue_rate): Add yongfeng. (ix86_adjust_cost): Ditto. * config/i386/x86-tune.def (X86_TUNE_SCHEDULE): Replace m_LUJIAZUI with m_ZHAOXIN. (X86_TUNE_PARTIAL_REG_DEPENDENCY): Ditto. (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY): Ditto. (X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY): Ditto. (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY): Ditto. (X86_TUNE_MOVX): Ditto. (X86_TUNE_MEMORY_MISMATCH_STALL): Ditto. (X86_TUNE_FUSE_CMP_AND_BRANCH_32): Ditto. (X86_TUNE_FUSE_CMP_AND_BRANCH_64): Ditto. (X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS): Ditto. (X86_TUNE_FUSE_ALU_AND_BRANCH): Ditto. (X86_TUNE_ACCUMULATE_OUTGOING_ARGS): Ditto. (X86_TUNE_USE_LEAVE): Ditto. (X86_TUNE_PUSH_MEMORY): Ditto. (X86_TUNE_LCP_STALL): Ditto. (X86_TUNE_INTEGER_DFMODE_MOVES): Ditto. (X86_TUNE_OPT_AGU): Ditto. (X86_TUNE_PREFER_KNOWN_REP_MOVSB_STOSB): Ditto. (X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES): Ditto. (X86_TUNE_USE_SAHF): Ditto. (X86_TUNE_USE_BT): Ditto. (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI): Ditto. (X86_TUNE_ONE_IF_CONV_INSN): Ditto. (X86_TUNE_AVOID_MFENCE): Ditto. (X86_TUNE_EXPAND_ABS): Ditto. (X86_TUNE_USE_SIMODE_FIOP): Ditto. (X86_TUNE_USE_FFREEP): Ditto. (X86_TUNE_EXT_80387_CONSTANTS): Ditto. (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL): Ditto. (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL): Ditto. (X86_TUNE_SSE_TYPELESS_STORES): Ditto. (X86_TUNE_SSE_LOAD0_BY_PXOR): Ditto. (X86_TUNE_USE_GATHER_2PARTS): Add m_YONGFENG. (X86_TUNE_USE_GATHER_4PARTS): Ditto. (X86_TUNE_USE_GATHER_8PARTS): Ditto. (X86_TUNE_AVOID_128FMA_CHAINS): Ditto. * doc/extend.texi: Add details about yongfeng. * doc/invoke.texi: Ditto. * config/i386/yongfeng.md: New file to describe yongfeng processor. gcc/testsuite/ChangeLog: * g++.target/i386/mv32.C: Handle new -march. * gcc.target/i386/funcspec-56.inc: Ditto.
This commit is contained in:
parent
6504b4a498
commit
94c0b26f45
@ -663,6 +663,12 @@ get_zhaoxin_cpu (struct __processor_model *cpu_model,
|
||||
reset_cpu_feature (cpu_model, cpu_features2, FEATURE_F16C);
|
||||
cpu_model->__cpu_subtype = ZHAOXIN_FAM7H_LUJIAZUI;
|
||||
}
|
||||
else if (model >= 0x5b)
|
||||
{
|
||||
cpu = "yongfeng";
|
||||
CHECK___builtin_cpu_is ("yongfeng");
|
||||
cpu_model->__cpu_subtype = ZHAOXIN_FAM7H_YONGFENG;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
|
@ -2098,6 +2098,7 @@ const char *const processor_names[] =
|
||||
"pantherlake",
|
||||
"intel",
|
||||
"lujiazui",
|
||||
"yongfeng",
|
||||
"geode",
|
||||
"k6",
|
||||
"athlon",
|
||||
@ -2305,12 +2306,11 @@ const pta processor_alias_table[] =
|
||||
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
|
||||
| PTA_SSSE3 | PTA_SSE4_1 | PTA_FXSR, 0, P_NONE},
|
||||
{"lujiazui", PROCESSOR_LUJIAZUI, CPU_LUJIAZUI,
|
||||
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
|
||||
| PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
|
||||
| PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_BMI | PTA_BMI2
|
||||
| PTA_PRFCHW | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE
|
||||
| PTA_RDRND | PTA_MOVBE | PTA_ADX | PTA_RDSEED | PTA_POPCNT,
|
||||
PTA_LUJIAZUI,
|
||||
M_CPU_SUBTYPE (ZHAOXIN_FAM7H_LUJIAZUI), P_NONE},
|
||||
{"yongfeng", PROCESSOR_YONGFENG, CPU_YONGFENG,
|
||||
PTA_YONGFENG,
|
||||
M_CPU_SUBTYPE (ZHAOXIN_FAM7H_YONGFENG), P_NONE},
|
||||
{"k8", PROCESSOR_K8, CPU_K8,
|
||||
PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
|
||||
| PTA_SSE2 | PTA_NO_SAHF | PTA_FXSR, 0, P_NONE},
|
||||
|
@ -103,6 +103,7 @@ enum processor_subtypes
|
||||
INTEL_COREI7_ARROWLAKE,
|
||||
INTEL_COREI7_ARROWLAKE_S,
|
||||
INTEL_COREI7_PANTHERLAKE,
|
||||
ZHAOXIN_FAM7H_YONGFENG,
|
||||
CPU_SUBTYPE_MAX
|
||||
};
|
||||
|
||||
|
@ -706,7 +706,7 @@ slm nehalem westmere sandybridge ivybridge haswell broadwell bonnell \
|
||||
silvermont knl knm skylake-avx512 cannonlake icelake-client icelake-server \
|
||||
skylake goldmont goldmont-plus tremont cascadelake tigerlake cooperlake \
|
||||
sapphirerapids alderlake rocketlake eden-x2 nano nano-1000 nano-2000 nano-3000 \
|
||||
nano-x2 eden-x4 nano-x4 lujiazui x86-64 x86-64-v2 x86-64-v3 x86-64-v4 \
|
||||
nano-x2 eden-x4 nano-x4 lujiazui yongfeng x86-64 x86-64-v2 x86-64-v3 x86-64-v4 \
|
||||
sierraforest graniterapids graniterapids-d grandridge arrowlake arrowlake-s \
|
||||
clearwaterforest pantherlake native"
|
||||
|
||||
@ -3811,6 +3811,10 @@ case ${target} in
|
||||
arch=lujiazui
|
||||
cpu=lujiazui
|
||||
;;
|
||||
yongfeng-*)
|
||||
arch=yongfeng
|
||||
cpu=yongfeng
|
||||
;;
|
||||
pentium2-*)
|
||||
arch=pentium2
|
||||
cpu=pentium2
|
||||
@ -3924,10 +3928,14 @@ case ${target} in
|
||||
arch=k8
|
||||
cpu=k8
|
||||
;;
|
||||
lujiazui-*)
|
||||
lujiazui-*)
|
||||
arch=lujiazui
|
||||
cpu=lujiazui
|
||||
;;
|
||||
yongfeng-*)
|
||||
arch=yongfeng
|
||||
cpu=yongfeng
|
||||
;;
|
||||
nocona-*)
|
||||
arch=nocona
|
||||
cpu=nocona
|
||||
|
@ -530,6 +530,8 @@ const char *host_detect_local_cpu (int argc, const char **argv)
|
||||
case 7:
|
||||
if (model == 0x3b)
|
||||
processor = PROCESSOR_LUJIAZUI;
|
||||
else if (model >= 0x5b)
|
||||
processor = PROCESSOR_YONGFENG;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
@ -817,6 +819,9 @@ const char *host_detect_local_cpu (int argc, const char **argv)
|
||||
case PROCESSOR_LUJIAZUI:
|
||||
cpu = "lujiazui";
|
||||
break;
|
||||
case PROCESSOR_YONGFENG:
|
||||
cpu = "yongfeng";
|
||||
break;
|
||||
|
||||
default:
|
||||
/* Use something reasonable. */
|
||||
|
@ -148,6 +148,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
|
||||
def_or_undef (parse_in, "__lujiazui");
|
||||
def_or_undef (parse_in, "__lujiazui__");
|
||||
break;
|
||||
case PROCESSOR_YONGFENG:
|
||||
def_or_undef (parse_in, "__yongfeng");
|
||||
def_or_undef (parse_in, "__yongfeng__");
|
||||
break;
|
||||
case PROCESSOR_PENTIUM4:
|
||||
def_or_undef (parse_in, "__pentium4");
|
||||
def_or_undef (parse_in, "__pentium4__");
|
||||
@ -379,6 +383,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
|
||||
case PROCESSOR_LUJIAZUI:
|
||||
def_or_undef (parse_in, "__tune_lujiazui__");
|
||||
break;
|
||||
case PROCESSOR_YONGFENG:
|
||||
def_or_undef (parse_in, "__tune_yongfeng__");
|
||||
break;
|
||||
case PROCESSOR_PENTIUM4:
|
||||
def_or_undef (parse_in, "__tune_pentium4__");
|
||||
break;
|
||||
|
@ -154,6 +154,8 @@ along with GCC; see the file COPYING3. If not see
|
||||
| m_TIGERLAKE | m_COOPERLAKE | m_ROCKETLAKE)
|
||||
|
||||
#define m_LUJIAZUI (HOST_WIDE_INT_1U<<PROCESSOR_LUJIAZUI)
|
||||
#define m_YONGFENG (HOST_WIDE_INT_1U<<PROCESSOR_YONGFENG)
|
||||
#define m_ZHAOXIN (m_LUJIAZUI | m_YONGFENG)
|
||||
|
||||
#define m_GEODE (HOST_WIDE_INT_1U<<PROCESSOR_GEODE)
|
||||
#define m_K6 (HOST_WIDE_INT_1U<<PROCESSOR_K6)
|
||||
@ -792,6 +794,7 @@ static const struct processor_costs *processor_cost_table[] =
|
||||
&alderlake_cost,
|
||||
&intel_cost,
|
||||
&lujiazui_cost,
|
||||
&yongfeng_cost,
|
||||
&geode_cost,
|
||||
&k6_cost,
|
||||
&athlon_cost,
|
||||
|
@ -2297,6 +2297,7 @@ enum processor_type
|
||||
PROCESSOR_PANTHERLAKE,
|
||||
PROCESSOR_INTEL,
|
||||
PROCESSOR_LUJIAZUI,
|
||||
PROCESSOR_YONGFENG,
|
||||
PROCESSOR_GEODE,
|
||||
PROCESSOR_K6,
|
||||
PROCESSOR_ATHLON,
|
||||
@ -2435,6 +2436,14 @@ constexpr wide_int_bitmask PTA_ZNVER4 = PTA_ZNVER3 | PTA_AVX512F | PTA_AVX512DQ
|
||||
| PTA_AVX512BF16 | PTA_AVX512VBMI | PTA_AVX512VBMI2 | PTA_GFNI
|
||||
| PTA_AVX512VNNI | PTA_AVX512BITALG | PTA_AVX512VPOPCNTDQ;
|
||||
|
||||
constexpr wide_int_bitmask PTA_LUJIAZUI = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
|
||||
| PTA_SSE3 | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
|
||||
| PTA_PCLMUL | PTA_BMI | PTA_BMI2 | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT
|
||||
| PTA_FSGSBASE | PTA_RDRND | PTA_MOVBE | PTA_ADX | PTA_RDSEED | PTA_POPCNT;
|
||||
|
||||
constexpr wide_int_bitmask PTA_YONGFENG = PTA_LUJIAZUI | PTA_AVX | PTA_AVX2 | PTA_F16C
|
||||
| PTA_FMA | PTA_SHA | PTA_LZCNT;
|
||||
|
||||
#ifndef GENERATOR_FILE
|
||||
|
||||
#include "insn-attr-common.h"
|
||||
|
@ -508,7 +508,7 @@
|
||||
|
||||
;; Processor type.
|
||||
(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,nehalem,
|
||||
atom,slm,glm,haswell,generic,lujiazui,amdfam10,bdver1,
|
||||
atom,slm,glm,haswell,generic,lujiazui,yongfeng,amdfam10,bdver1,
|
||||
bdver2,bdver3,bdver4,btver2,znver1,znver2,znver3,znver4"
|
||||
(const (symbol_ref "ix86_schedule")))
|
||||
|
||||
@ -1382,6 +1382,7 @@
|
||||
(include "core2.md")
|
||||
(include "haswell.md")
|
||||
(include "lujiazui.md")
|
||||
(include "yongfeng.md")
|
||||
|
||||
|
||||
;; Operand and operator predicates and constraints
|
||||
|
@ -137,7 +137,7 @@
|
||||
(define_insn_reservation "lua_lea" 1
|
||||
(and (eq_attr "cpu" "lujiazui")
|
||||
(eq_attr "type" "lea"))
|
||||
"hsw_decodern,lua_p45")
|
||||
"lua_decodern,lua_p45")
|
||||
|
||||
(define_insn_reservation "lua_shift_rotate" 1
|
||||
(and (eq_attr "cpu" "lujiazui")
|
||||
|
@ -3393,6 +3393,122 @@ struct processor_costs lujiazui_cost = {
|
||||
2, /* Small unroll factor. */
|
||||
};
|
||||
|
||||
/* yongfeng_cost should produce code tuned for ZHAOXIN yongfeng CPU. */
|
||||
static stringop_algs yongfeng_memcpy[2] = {
|
||||
{libcall, {{6, unrolled_loop, true}, {256, unrolled_loop, false},
|
||||
{-1, libcall, false}}},
|
||||
{libcall, {{8, loop, false}, {512, unrolled_loop, false},
|
||||
{-1, libcall, false}}}};
|
||||
static stringop_algs yongfeng_memset[2] = {
|
||||
{libcall, {{6, loop_1_byte, false}, {128, loop, false},
|
||||
{-1, libcall, false}}},
|
||||
{libcall, {{2, rep_prefix_4_byte, false}, {64, loop, false},
|
||||
{1024, vector_loop, false},
|
||||
{-1, libcall, false}}}};
|
||||
static const
|
||||
struct processor_costs yongfeng_cost = {
|
||||
{
|
||||
/* Start of register allocator costs. integer->integer move cost is 2. */
|
||||
8, /* cost for loading QImode using movzbl. */
|
||||
{8, 8, 8}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
Relative to reg-reg move (2). */
|
||||
{8, 8, 8}, /* cost of storing integer registers. */
|
||||
2, /* cost of reg,reg fld/fst. */
|
||||
{8, 8, 8}, /* cost of loading fp registers
|
||||
in SFmode, DFmode and XFmode. */
|
||||
{8, 8, 8}, /* cost of storing fp registers
|
||||
in SFmode, DFmode and XFmode. */
|
||||
2, /* cost of moving MMX register. */
|
||||
{8, 8}, /* cost of loading MMX registers
|
||||
in SImode and DImode. */
|
||||
{8, 8}, /* cost of storing MMX registers
|
||||
in SImode and DImode. */
|
||||
2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */
|
||||
{8, 8, 8, 10, 15}, /* cost of loading SSE registers
|
||||
in 32,64,128,256 and 512-bit. */
|
||||
{8, 8, 8, 10, 15}, /* cost of storing SSE registers
|
||||
in 32,64,128,256 and 512-bit. */
|
||||
8, 8, /* SSE->integer and integer->SSE moves. */
|
||||
8, 8, /* mask->integer and integer->mask moves. */
|
||||
{8, 8, 8}, /* cost of loading mask register
|
||||
in QImode, HImode, SImode. */
|
||||
{8, 8, 8}, /* cost if storing mask register
|
||||
in QImode, HImode, SImode. */
|
||||
2, /* cost of moving mask register. */
|
||||
/* End of register allocator costs. */
|
||||
},
|
||||
|
||||
COSTS_N_INSNS (1), /* cost of an add instruction. */
|
||||
COSTS_N_INSNS (1), /* cost of a lea instruction. */
|
||||
COSTS_N_INSNS (1), /* variable shift costs. */
|
||||
COSTS_N_INSNS (1), /* constant shift costs. */
|
||||
{COSTS_N_INSNS (2), /* cost of starting multiply for QI. */
|
||||
COSTS_N_INSNS (3), /* HI. */
|
||||
COSTS_N_INSNS (2), /* SI. */
|
||||
COSTS_N_INSNS (2), /* DI. */
|
||||
COSTS_N_INSNS (3)}, /* other. */
|
||||
0, /* cost of multiply per each bit set. */
|
||||
{COSTS_N_INSNS (8), /* cost of a divide/mod for QI. */
|
||||
COSTS_N_INSNS (9), /* HI. */
|
||||
COSTS_N_INSNS (8), /* SI. */
|
||||
COSTS_N_INSNS (41), /* DI. */
|
||||
COSTS_N_INSNS (41)}, /* other. */
|
||||
COSTS_N_INSNS (1), /* cost of movsx. */
|
||||
COSTS_N_INSNS (1), /* cost of movzx. */
|
||||
8, /* "large" insn. */
|
||||
17, /* MOVE_RATIO. */
|
||||
6, /* CLEAR_RATIO. */
|
||||
{8, 8, 8}, /* cost of loading integer registers
|
||||
in QImode, HImode and SImode.
|
||||
Relative to reg-reg move (2). */
|
||||
{8, 8, 8}, /* cost of storing integer registers. */
|
||||
{8, 8, 8, 12, 15}, /* cost of loading SSE register
|
||||
in 32bit, 64bit, 128bit, 256bit and 512bit. */
|
||||
{8, 8, 8, 12, 15}, /* cost of storing SSE register
|
||||
in 32bit, 64bit, 128bit, 256bit and 512bit. */
|
||||
{8, 8, 8, 12, 15}, /* cost of unaligned loads. */
|
||||
{8, 8, 8, 12, 15}, /* cost of unaligned storess. */
|
||||
2, 3, 4, /* cost of moving XMM,YMM,ZMM register. */
|
||||
8, /* cost of moving SSE register to integer. */
|
||||
18, 6, /* Gather load static, per_elt. */
|
||||
18, 6, /* Gather store static, per_elt. */
|
||||
32, /* size of l1 cache. */
|
||||
256, /* size of l2 cache. */
|
||||
64, /* size of prefetch block. */
|
||||
12, /* number of parallel prefetches. */
|
||||
3, /* Branch cost. */
|
||||
COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
|
||||
COSTS_N_INSNS (3), /* cost of FMUL instruction. */
|
||||
COSTS_N_INSNS (14), /* cost of FDIV instruction. */
|
||||
COSTS_N_INSNS (1), /* cost of FABS instruction. */
|
||||
COSTS_N_INSNS (1), /* cost of FCHS instruction. */
|
||||
COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
|
||||
|
||||
COSTS_N_INSNS (1), /* cost of cheap SSE instruction. */
|
||||
COSTS_N_INSNS (3), /* cost of ADDSS/SD SUBSS/SD insns. */
|
||||
COSTS_N_INSNS (3), /* cost of MULSS instruction. */
|
||||
COSTS_N_INSNS (3), /* cost of MULSD instruction. */
|
||||
COSTS_N_INSNS (5), /* cost of FMA SS instruction. */
|
||||
COSTS_N_INSNS (5), /* cost of FMA SD instruction. */
|
||||
COSTS_N_INSNS (10), /* cost of DIVSS instruction. */
|
||||
COSTS_N_INSNS (14), /* cost of DIVSD instruction. */
|
||||
COSTS_N_INSNS (20), /* cost of SQRTSS instruction. */
|
||||
COSTS_N_INSNS (35), /* cost of SQRTSD instruction. */
|
||||
4, 4, 4, 4, /* reassoc int, fp, vec_int, vec_fp. */
|
||||
yongfeng_memcpy,
|
||||
yongfeng_memset,
|
||||
COSTS_N_INSNS (3), /* cond_taken_branch_cost. */
|
||||
COSTS_N_INSNS (1), /* cond_not_taken_branch_cost. */
|
||||
"16:11:8", /* Loop alignment. */
|
||||
"16:11:8", /* Jump alignment. */
|
||||
"0:0:8", /* Label alignment. */
|
||||
"16", /* Func alignment. */
|
||||
4, /* Small unroll limit. */
|
||||
2, /* Small unroll factor. */
|
||||
};
|
||||
|
||||
|
||||
/* Generic should produce code tuned for Core-i7 (and newer chips)
|
||||
and btver1 (and newer chips). */
|
||||
|
||||
|
@ -79,6 +79,7 @@ ix86_issue_rate (void)
|
||||
case PROCESSOR_CASCADELAKE:
|
||||
case PROCESSOR_CANNONLAKE:
|
||||
case PROCESSOR_ALDERLAKE:
|
||||
case PROCESSOR_YONGFENG:
|
||||
case PROCESSOR_GENERIC:
|
||||
return 4;
|
||||
|
||||
@ -384,7 +385,6 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
|
||||
|
||||
case PROCESSOR_ATHLON:
|
||||
case PROCESSOR_K8:
|
||||
case PROCESSOR_LUJIAZUI:
|
||||
memory = get_attr_memory (insn);
|
||||
|
||||
/* Show ability of reorder buffer to hide latency of load by executing
|
||||
@ -445,6 +445,31 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
|
||||
}
|
||||
break;
|
||||
|
||||
case PROCESSOR_YONGFENG:
|
||||
/* Stack engine allows to execute push&pop instructions in parallel. */
|
||||
if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
|
||||
&& (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
|
||||
return 0;
|
||||
/* FALLTHRU */
|
||||
|
||||
case PROCESSOR_LUJIAZUI:
|
||||
memory = get_attr_memory (insn);
|
||||
|
||||
/* Show ability of reorder buffer to hide latency of load by executing
|
||||
in parallel with previous instruction in case
|
||||
previous instruction is not needed to compute the address. */
|
||||
if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
|
||||
&& !ix86_agi_dependent (dep_insn, insn))
|
||||
{
|
||||
int loadcost = 4;
|
||||
|
||||
if (cost >= loadcost)
|
||||
cost -= loadcost;
|
||||
else
|
||||
cost = 0;
|
||||
}
|
||||
break;
|
||||
|
||||
case PROCESSOR_CORE2:
|
||||
case PROCESSOR_NEHALEM:
|
||||
case PROCESSOR_SANDYBRIDGE:
|
||||
|
@ -41,7 +41,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
||||
/* X86_TUNE_SCHEDULE: Enable scheduling. */
|
||||
DEF_TUNE (X86_TUNE_SCHEDULE, "schedule",
|
||||
m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL | m_SILVERMONT
|
||||
| m_INTEL | m_KNL | m_KNM | m_K6_GEODE | m_AMD_MULTIPLE | m_LUJIAZUI
|
||||
| m_INTEL | m_KNL | m_KNM | m_K6_GEODE | m_AMD_MULTIPLE | m_ZHAOXIN
|
||||
| m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_CORE_HYBRID
|
||||
| m_CORE_ATOM | m_GENERIC)
|
||||
|
||||
@ -52,7 +52,7 @@ DEF_TUNE (X86_TUNE_SCHEDULE, "schedule",
|
||||
DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency",
|
||||
m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2
|
||||
| m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_GOLDMONT_PLUS | m_INTEL
|
||||
| m_KNL | m_KNM | m_AMD_MULTIPLE | m_LUJIAZUI | m_TREMONT
|
||||
| m_KNL | m_KNM | m_AMD_MULTIPLE | m_ZHAOXIN | m_TREMONT
|
||||
| m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC)
|
||||
|
||||
/* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: This knob promotes all store
|
||||
@ -63,7 +63,7 @@ DEF_TUNE (X86_TUNE_PARTIAL_REG_DEPENDENCY, "partial_reg_dependency",
|
||||
that can be partly masked by careful scheduling of moves. */
|
||||
DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency",
|
||||
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10
|
||||
| m_BDVER | m_ZNVER | m_LUJIAZUI | m_TREMONT | m_CORE_HYBRID
|
||||
| m_BDVER | m_ZNVER | m_ZHAOXIN | m_TREMONT | m_CORE_HYBRID
|
||||
| m_CORE_ATOM | m_GENERIC)
|
||||
|
||||
/* X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY: This knob avoids
|
||||
@ -72,7 +72,7 @@ DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY, "sse_partial_reg_dependency",
|
||||
DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY,
|
||||
"sse_partial_reg_fp_converts_dependency",
|
||||
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10
|
||||
| m_BDVER | m_ZNVER | m_LUJIAZUI | m_CORE_HYBRID | m_CORE_ATOM
|
||||
| m_BDVER | m_ZNVER | m_ZHAOXIN | m_CORE_HYBRID | m_CORE_ATOM
|
||||
| m_GENERIC)
|
||||
|
||||
/* X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY: This knob avoids partial
|
||||
@ -80,7 +80,7 @@ DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_FP_CONVERTS_DEPENDENCY,
|
||||
DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY,
|
||||
"sse_partial_reg_converts_dependency",
|
||||
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_AMDFAM10
|
||||
| m_BDVER | m_ZNVER | m_LUJIAZUI | m_CORE_HYBRID | m_CORE_ATOM
|
||||
| m_BDVER | m_ZNVER | m_ZHAOXIN | m_CORE_HYBRID | m_CORE_ATOM
|
||||
| m_GENERIC)
|
||||
|
||||
/* X86_TUNE_DEST_FALSE_DEP_FOR_GLC: This knob inserts zero-idiom before
|
||||
@ -113,7 +113,7 @@ DEF_TUNE (X86_TUNE_PARTIAL_FLAG_REG_STALL, "partial_flag_reg_stall",
|
||||
DEF_TUNE (X86_TUNE_MOVX, "movx",
|
||||
m_PPRO | m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE
|
||||
| m_BONNELL | m_SILVERMONT | m_GOLDMONT | m_KNL | m_KNM | m_INTEL
|
||||
| m_GOLDMONT_PLUS | m_GEODE | m_AMD_MULTIPLE | m_LUJIAZUI
|
||||
| m_GOLDMONT_PLUS | m_GEODE | m_AMD_MULTIPLE | m_ZHAOXIN
|
||||
| m_CORE_AVX2 | m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC)
|
||||
|
||||
/* X86_TUNE_MEMORY_MISMATCH_STALL: Avoid partial stores that are followed by
|
||||
@ -121,31 +121,31 @@ DEF_TUNE (X86_TUNE_MOVX, "movx",
|
||||
DEF_TUNE (X86_TUNE_MEMORY_MISMATCH_STALL, "memory_mismatch_stall",
|
||||
m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_INTEL
|
||||
| m_KNL | m_KNM | m_GOLDMONT | m_GOLDMONT_PLUS | m_AMD_MULTIPLE
|
||||
| m_LUJIAZUI | m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC)
|
||||
| m_ZHAOXIN | m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC)
|
||||
|
||||
/* X86_TUNE_FUSE_CMP_AND_BRANCH_32: Fuse compare with a subsequent
|
||||
conditional jump instruction for 32 bit TARGET. */
|
||||
DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_32, "fuse_cmp_and_branch_32",
|
||||
m_CORE_ALL | m_BDVER | m_ZNVER | m_LUJIAZUI | m_GENERIC)
|
||||
m_CORE_ALL | m_BDVER | m_ZNVER | m_ZHAOXIN | m_GENERIC)
|
||||
|
||||
/* X86_TUNE_FUSE_CMP_AND_BRANCH_64: Fuse compare with a subsequent
|
||||
conditional jump instruction for TARGET_64BIT. */
|
||||
DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_64, "fuse_cmp_and_branch_64",
|
||||
m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_BDVER
|
||||
| m_ZNVER | m_LUJIAZUI | m_GENERIC)
|
||||
| m_ZNVER | m_ZHAOXIN | m_GENERIC)
|
||||
|
||||
/* X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS: Fuse compare with a
|
||||
subsequent conditional jump instruction when the condition jump
|
||||
check sign flag (SF) or overflow flag (OF). */
|
||||
DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS, "fuse_cmp_and_branch_soflags",
|
||||
m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_BDVER
|
||||
| m_ZNVER | m_LUJIAZUI | m_GENERIC)
|
||||
| m_ZNVER | m_ZHAOXIN | m_GENERIC)
|
||||
|
||||
/* X86_TUNE_FUSE_ALU_AND_BRANCH: Fuse alu with a subsequent conditional
|
||||
jump instruction when the alu instruction produces the CCFLAG consumed by
|
||||
the conditional jump instruction. */
|
||||
DEF_TUNE (X86_TUNE_FUSE_ALU_AND_BRANCH, "fuse_alu_and_branch",
|
||||
m_SANDYBRIDGE | m_CORE_AVX2 | m_LUJIAZUI | m_GENERIC)
|
||||
m_SANDYBRIDGE | m_CORE_AVX2 | m_ZHAOXIN | m_GENERIC)
|
||||
|
||||
|
||||
/*****************************************************************************/
|
||||
@ -162,7 +162,7 @@ DEF_TUNE (X86_TUNE_FUSE_ALU_AND_BRANCH, "fuse_alu_and_branch",
|
||||
|
||||
DEF_TUNE (X86_TUNE_ACCUMULATE_OUTGOING_ARGS, "accumulate_outgoing_args",
|
||||
m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL
|
||||
| m_GOLDMONT | m_GOLDMONT_PLUS | m_ATHLON_K8 | m_LUJIAZUI)
|
||||
| m_GOLDMONT | m_GOLDMONT_PLUS | m_ATHLON_K8 | m_ZHAOXIN)
|
||||
|
||||
/* X86_TUNE_PROLOGUE_USING_MOVE: Do not use push/pop in prologues that are
|
||||
considered on critical path. */
|
||||
@ -176,7 +176,7 @@ DEF_TUNE (X86_TUNE_EPILOGUE_USING_MOVE, "epilogue_using_move",
|
||||
|
||||
/* X86_TUNE_USE_LEAVE: Use "leave" instruction in epilogues where it fits. */
|
||||
DEF_TUNE (X86_TUNE_USE_LEAVE, "use_leave",
|
||||
m_386 | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE | m_LUJIAZUI
|
||||
m_386 | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE | m_ZHAOXIN
|
||||
| m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC)
|
||||
|
||||
/* X86_TUNE_PUSH_MEMORY: Enable generation of "push mem" instructions.
|
||||
@ -184,7 +184,7 @@ DEF_TUNE (X86_TUNE_USE_LEAVE, "use_leave",
|
||||
and push instructions. */
|
||||
DEF_TUNE (X86_TUNE_PUSH_MEMORY, "push_memory",
|
||||
m_386 | m_P4_NOCONA | m_CORE_ALL | m_K6_GEODE | m_AMD_MULTIPLE
|
||||
| m_LUJIAZUI | m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC)
|
||||
| m_ZHAOXIN | m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC)
|
||||
|
||||
/* X86_TUNE_SINGLE_PUSH: Enable if single push insn is preferred
|
||||
over esp subtraction. */
|
||||
@ -239,7 +239,7 @@ DEF_TUNE (X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL, "software_prefetching_benefi
|
||||
|
||||
/* X86_TUNE_LCP_STALL: Avoid an expensive length-changing prefix stall
|
||||
on 16-bit immediate moves into memory on Core2 and Corei7. */
|
||||
DEF_TUNE (X86_TUNE_LCP_STALL, "lcp_stall", m_CORE_ALL | m_LUJIAZUI | m_GENERIC)
|
||||
DEF_TUNE (X86_TUNE_LCP_STALL, "lcp_stall", m_CORE_ALL | m_ZHAOXIN | m_GENERIC)
|
||||
|
||||
/* X86_TUNE_READ_MODIFY: Enable use of read-modify instructions such
|
||||
as "add mem, reg". */
|
||||
@ -255,20 +255,20 @@ DEF_TUNE (X86_TUNE_USE_INCDEC, "use_incdec",
|
||||
~(m_P4_NOCONA | m_CORE2 | m_NEHALEM | m_SANDYBRIDGE
|
||||
| m_BONNELL | m_SILVERMONT | m_INTEL | m_KNL | m_KNM | m_GOLDMONT
|
||||
| m_GOLDMONT_PLUS | m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM
|
||||
| m_LUJIAZUI | m_GENERIC))
|
||||
| m_ZHAOXIN | m_GENERIC))
|
||||
|
||||
/* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
|
||||
for DFmode copies */
|
||||
DEF_TUNE (X86_TUNE_INTEGER_DFMODE_MOVES, "integer_dfmode_moves",
|
||||
~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
|
||||
| m_KNL | m_KNM | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_LUJIAZUI
|
||||
| m_KNL | m_KNM | m_INTEL | m_GEODE | m_AMD_MULTIPLE | m_ZHAOXIN
|
||||
| m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_CORE_HYBRID
|
||||
| m_CORE_ATOM | m_GENERIC))
|
||||
|
||||
/* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
|
||||
will impact LEA instruction selection. */
|
||||
DEF_TUNE (X86_TUNE_OPT_AGU, "opt_agu", m_BONNELL | m_SILVERMONT | m_KNL
|
||||
| m_KNM | m_GOLDMONT | m_GOLDMONT_PLUS | m_INTEL | m_LUJIAZUI)
|
||||
| m_KNM | m_GOLDMONT | m_GOLDMONT_PLUS | m_INTEL | m_ZHAOXIN)
|
||||
|
||||
/* X86_TUNE_AVOID_LEA_FOR_ADDR: Avoid lea for address computation. */
|
||||
DEF_TUNE (X86_TUNE_AVOID_LEA_FOR_ADDR, "avoid_lea_for_addr",
|
||||
@ -302,7 +302,7 @@ DEF_TUNE (X86_TUNE_SINGLE_STRINGOP, "single_stringop", m_386 | m_P4_NOCONA)
|
||||
DEF_TUNE (X86_TUNE_PREFER_KNOWN_REP_MOVSB_STOSB,
|
||||
"prefer_known_rep_movsb_stosb",
|
||||
m_SKYLAKE | m_CORE_HYBRID | m_CORE_ATOM | m_TREMONT | m_CORE_AVX512
|
||||
| m_LUJIAZUI)
|
||||
| m_ZHAOXIN)
|
||||
|
||||
/* X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES: Enable generation of
|
||||
compact prologues and epilogues by issuing a misaligned moves. This
|
||||
@ -311,14 +311,14 @@ DEF_TUNE (X86_TUNE_PREFER_KNOWN_REP_MOVSB_STOSB,
|
||||
FIXME: This may actualy be a win on more targets than listed here. */
|
||||
DEF_TUNE (X86_TUNE_MISALIGNED_MOVE_STRING_PRO_EPILOGUES,
|
||||
"misaligned_move_string_pro_epilogues",
|
||||
m_386 | m_486 | m_CORE_ALL | m_AMD_MULTIPLE | m_LUJIAZUI | m_TREMONT
|
||||
m_386 | m_486 | m_CORE_ALL | m_AMD_MULTIPLE | m_ZHAOXIN | m_TREMONT
|
||||
| m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC)
|
||||
|
||||
/* X86_TUNE_USE_SAHF: Controls use of SAHF. */
|
||||
DEF_TUNE (X86_TUNE_USE_SAHF, "use_sahf",
|
||||
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
|
||||
| m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER
|
||||
| m_BTVER | m_ZNVER | m_LUJIAZUI | m_GOLDMONT | m_GOLDMONT_PLUS
|
||||
| m_BTVER | m_ZNVER | m_ZHAOXIN | m_GOLDMONT | m_GOLDMONT_PLUS
|
||||
| m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC)
|
||||
|
||||
/* X86_TUNE_USE_CLTD: Controls use of CLTD and CTQO instructions. */
|
||||
@ -329,7 +329,7 @@ DEF_TUNE (X86_TUNE_USE_CLTD, "use_cltd",
|
||||
/* X86_TUNE_USE_BT: Enable use of BT (bit test) instructions. */
|
||||
DEF_TUNE (X86_TUNE_USE_BT, "use_bt",
|
||||
m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL
|
||||
| m_LAKEMONT | m_AMD_MULTIPLE | m_LUJIAZUI | m_GOLDMONT
|
||||
| m_LAKEMONT | m_AMD_MULTIPLE | m_ZHAOXIN | m_GOLDMONT
|
||||
| m_GOLDMONT_PLUS | m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM
|
||||
| m_GENERIC)
|
||||
|
||||
@ -338,7 +338,7 @@ DEF_TUNE (X86_TUNE_USE_BT, "use_bt",
|
||||
DEF_TUNE (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI, "avoid_false_dep_for_bmi",
|
||||
m_SANDYBRIDGE | m_HASWELL | m_SKYLAKE | m_SKYLAKE_AVX512
|
||||
| m_CANNONLAKE | m_CASCADELAKE | m_COOPERLAKE
|
||||
| m_LUJIAZUI | m_GENERIC)
|
||||
| m_ZHAOXIN | m_GENERIC)
|
||||
|
||||
/* X86_TUNE_ADJUST_UNROLL: This enables adjusting the unroll factor based
|
||||
on hardware capabilities. Bdver3 hardware has a loop buffer which makes
|
||||
@ -351,11 +351,11 @@ DEF_TUNE (X86_TUNE_ADJUST_UNROLL, "adjust_unroll_factor", m_BDVER3 | m_BDVER4)
|
||||
DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn",
|
||||
m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_CORE_ALL | m_GOLDMONT
|
||||
| m_GOLDMONT_PLUS | m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM
|
||||
| m_LUJIAZUI | m_GENERIC)
|
||||
| m_ZHAOXIN | m_GENERIC)
|
||||
|
||||
/* X86_TUNE_AVOID_MFENCE: Use lock prefixed instructions instead of mfence. */
|
||||
DEF_TUNE (X86_TUNE_AVOID_MFENCE, "avoid_mfence",
|
||||
m_CORE_ALL | m_BDVER | m_ZNVER | m_LUJIAZUI | m_TREMONT | m_CORE_HYBRID
|
||||
m_CORE_ALL | m_BDVER | m_ZNVER | m_ZHAOXIN | m_TREMONT | m_CORE_HYBRID
|
||||
| m_CORE_ATOM | m_GENERIC)
|
||||
|
||||
/* X86_TUNE_EXPAND_ABS: This enables a new abs pattern by
|
||||
@ -363,7 +363,7 @@ DEF_TUNE (X86_TUNE_AVOID_MFENCE, "avoid_mfence",
|
||||
(signed) x >> (W-1)) instead of cmove or SSE max/abs instructions. */
|
||||
DEF_TUNE (X86_TUNE_EXPAND_ABS, "expand_abs",
|
||||
m_CORE_ALL | m_SILVERMONT | m_KNL | m_KNM | m_GOLDMONT
|
||||
| m_GOLDMONT_PLUS | m_LUJIAZUI)
|
||||
| m_GOLDMONT_PLUS | m_ZHAOXIN)
|
||||
|
||||
/*****************************************************************************/
|
||||
/* 387 instruction selection tuning */
|
||||
@ -380,16 +380,16 @@ DEF_TUNE (X86_TUNE_USE_HIMODE_FIOP, "use_himode_fiop",
|
||||
DEF_TUNE (X86_TUNE_USE_SIMODE_FIOP, "use_simode_fiop",
|
||||
~(m_PENT | m_LAKEMONT | m_PPRO | m_CORE_ALL | m_BONNELL
|
||||
| m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_AMD_MULTIPLE
|
||||
| m_LUJIAZUI | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT
|
||||
| m_ZHAOXIN | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT
|
||||
| m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC))
|
||||
|
||||
/* X86_TUNE_USE_FFREEP: Use freep instruction instead of fstp. */
|
||||
DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE | m_LUJIAZUI)
|
||||
DEF_TUNE (X86_TUNE_USE_FFREEP, "use_ffreep", m_AMD_MULTIPLE | m_ZHAOXIN)
|
||||
|
||||
/* X86_TUNE_EXT_80387_CONSTANTS: Use fancy 80387 constants, such as PI. */
|
||||
DEF_TUNE (X86_TUNE_EXT_80387_CONSTANTS, "ext_80387_constants",
|
||||
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BONNELL | m_SILVERMONT
|
||||
| m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_ATHLON_K8 | m_LUJIAZUI
|
||||
| m_KNL | m_KNM | m_INTEL | m_K6_GEODE | m_ATHLON_K8 | m_ZHAOXIN
|
||||
| m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_CORE_HYBRID
|
||||
| m_CORE_ATOM | m_GENERIC)
|
||||
|
||||
@ -407,7 +407,7 @@ DEF_TUNE (X86_TUNE_GENERAL_REGS_SSE_SPILL, "general_regs_sse_spill",
|
||||
DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal",
|
||||
m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_SILVERMONT | m_KNL | m_KNM
|
||||
| m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_CORE_HYBRID
|
||||
| m_CORE_ATOM | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER | m_LUJIAZUI
|
||||
| m_CORE_ATOM | m_AMDFAM10 | m_BDVER | m_BTVER | m_ZNVER | m_ZHAOXIN
|
||||
| m_GENERIC)
|
||||
|
||||
/* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL: Use movups for misaligned stores
|
||||
@ -415,7 +415,7 @@ DEF_TUNE (X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL, "sse_unaligned_load_optimal",
|
||||
DEF_TUNE (X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL, "sse_unaligned_store_optimal",
|
||||
m_NEHALEM | m_SANDYBRIDGE | m_CORE_AVX2 | m_SILVERMONT | m_KNL | m_KNM
|
||||
| m_INTEL | m_GOLDMONT | m_GOLDMONT_PLUS | m_TREMONT | m_CORE_HYBRID
|
||||
| m_CORE_ATOM | m_BDVER | m_ZNVER | m_LUJIAZUI | m_GENERIC)
|
||||
| m_CORE_ATOM | m_BDVER | m_ZNVER | m_ZHAOXIN | m_GENERIC)
|
||||
|
||||
/* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL: Use packed single
|
||||
precision 128bit instructions instead of double where possible. */
|
||||
@ -424,14 +424,14 @@ DEF_TUNE (X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL, "sse_packed_single_insn_optim
|
||||
|
||||
/* X86_TUNE_SSE_TYPELESS_STORES: Always movaps/movups for 128bit stores. */
|
||||
DEF_TUNE (X86_TUNE_SSE_TYPELESS_STORES, "sse_typeless_stores",
|
||||
m_AMD_MULTIPLE | m_LUJIAZUI | m_CORE_ALL | m_TREMONT | m_CORE_HYBRID
|
||||
m_AMD_MULTIPLE | m_ZHAOXIN | m_CORE_ALL | m_TREMONT | m_CORE_HYBRID
|
||||
| m_CORE_ATOM | m_GENERIC)
|
||||
|
||||
/* X86_TUNE_SSE_LOAD0_BY_PXOR: Always use pxor to load0 as opposed to
|
||||
xorps/xorpd and other variants. */
|
||||
DEF_TUNE (X86_TUNE_SSE_LOAD0_BY_PXOR, "sse_load0_by_pxor",
|
||||
m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_BDVER | m_BTVER | m_ZNVER
|
||||
| m_LUJIAZUI | m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC)
|
||||
| m_ZHAOXIN | m_TREMONT | m_CORE_HYBRID | m_CORE_ATOM | m_GENERIC)
|
||||
|
||||
/* X86_TUNE_INTER_UNIT_MOVES_TO_VEC: Enable moves in from integer
|
||||
to SSE registers. If disabled, the moves will be done by storing
|
||||
@ -484,7 +484,7 @@ DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes",
|
||||
elements. */
|
||||
DEF_TUNE (X86_TUNE_USE_GATHER_2PARTS, "use_gather_2parts",
|
||||
~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_CORE_HYBRID
|
||||
| m_CORE_ATOM | m_GENERIC | m_GDS))
|
||||
| m_YONGFENG | m_CORE_ATOM | m_GENERIC | m_GDS))
|
||||
|
||||
/* X86_TUNE_USE_SCATTER_2PARTS: Use scater instructions for vectors with 2
|
||||
elements. */
|
||||
@ -495,7 +495,7 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_2PARTS, "use_scatter_2parts",
|
||||
elements. */
|
||||
DEF_TUNE (X86_TUNE_USE_GATHER_4PARTS, "use_gather_4parts",
|
||||
~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_CORE_HYBRID
|
||||
| m_CORE_ATOM | m_GENERIC | m_GDS))
|
||||
| m_YONGFENG | m_CORE_ATOM | m_GENERIC | m_GDS))
|
||||
|
||||
/* X86_TUNE_USE_SCATTER_4PARTS: Use scater instructions for vectors with 4
|
||||
elements. */
|
||||
@ -506,7 +506,7 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_4PARTS, "use_scatter_4parts",
|
||||
elements. */
|
||||
DEF_TUNE (X86_TUNE_USE_GATHER_8PARTS, "use_gather_8parts",
|
||||
~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_CORE_HYBRID | m_CORE_ATOM
|
||||
| m_GENERIC | m_GDS))
|
||||
| m_YONGFENG | m_GENERIC | m_GDS))
|
||||
|
||||
/* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more
|
||||
elements. */
|
||||
@ -515,7 +515,8 @@ DEF_TUNE (X86_TUNE_USE_SCATTER_8PARTS, "use_scatter_8parts",
|
||||
|
||||
/* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or
|
||||
smaller FMA chain. */
|
||||
DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER1 | m_ZNVER2 | m_ZNVER3)
|
||||
DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER1 | m_ZNVER2 | m_ZNVER3
|
||||
| m_YONGFENG)
|
||||
|
||||
/* X86_TUNE_AVOID_256FMA_CHAINS: Avoid creating loops with tight 256bit or
|
||||
smaller FMA chain. */
|
||||
|
848
gcc/config/i386/yongfeng.md
Normal file
848
gcc/config/i386/yongfeng.md
Normal file
@ -0,0 +1,848 @@
|
||||
;; Copyright (C) 2012-2023 Free Software Foundation, Inc.
|
||||
;;
|
||||
;; This file is part of GCC.
|
||||
;;
|
||||
;; GCC is free software; you can redistribute it and/or modify
|
||||
;; it under the terms of the GNU General Public License as published by
|
||||
;; the Free Software Foundation; either version 3, or (at your option)
|
||||
;; any later version.
|
||||
;;
|
||||
;; GCC is distributed in the hope that it will be useful,
|
||||
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
;; GNU General Public License for more details.
|
||||
;;
|
||||
;; You should have received a copy of the GNU General Public License
|
||||
;; along with GCC; see the file COPYING3. If not see
|
||||
;; <http://www.gnu.org/licenses/>.
|
||||
;;
|
||||
|
||||
;; ZHAOXIN yongfeng processor Scheduling
|
||||
;; Modeling automatons for yongfeng decoders, integer execution pipes,
|
||||
;; FP execution pipes, AGU pipes, and dividers.
|
||||
(define_automaton "yongfeng_decoder,yongfeng_ieu,yongfeng_fp,yongfeng_agu,yongfeng_idiv,yongfeng_fdiv")
|
||||
|
||||
;; The rules for the decoder are simple:
|
||||
;; - an instruction with 1 uop can be decoded by any of the four
|
||||
;; decoders in one cycle.
|
||||
;; - an instruction with 2 uops can be decoded by decoder 0 or decoder 1
|
||||
;; or decoder 2 but still in only one cycle.
|
||||
;; - a complex (microcode) instruction can only be decoded by
|
||||
;; decoder 0, and this takes an unspecified number of cycles.
|
||||
;;
|
||||
;; The goal is to schedule such that we have a few-one-two uops sequence
|
||||
;; in each cycle, to decode as many instructions per cycle as possible.
|
||||
(define_cpu_unit "yf_decoder0" "yongfeng_decoder")
|
||||
(define_cpu_unit "yf_decoder1" "yongfeng_decoder")
|
||||
(define_cpu_unit "yf_decoder2" "yongfeng_decoder")
|
||||
(define_cpu_unit "yf_decoder3" "yongfeng_decoder")
|
||||
|
||||
;; We first wish to find an instruction for yf_decoder0, so exclude
|
||||
;; other decoders from being reserved until yf_decoder0 is
|
||||
;; reserved
|
||||
(presence_set "yf_decoder1" "yf_decoder0")
|
||||
(presence_set "yf_decoder2" "yf_decoder0")
|
||||
(presence_set "yf_decoder3" "yf_decoder0")
|
||||
|
||||
;; Most instructions can be decoded on any of the three decoders.
|
||||
(define_reservation "yf_decodern" "yf_decoder0|yf_decoder1|yf_decoder2|yf_decoder3")
|
||||
(define_reservation "yf_decoder012" "yf_decoder0|yf_decoder1|yf_decoder2")
|
||||
|
||||
;; The out-of-order core has ten pipelines. Port 0,1,2,3 are integer execution
|
||||
;; pipelines, port 4, 5 are responsible for address calculation, load and store,
|
||||
;; port 6,7,8,9 are FP pipelines.
|
||||
(define_cpu_unit "yf_p0,yf_p1,yf_p2,yf_p3" "yongfeng_ieu")
|
||||
(define_cpu_unit "yf_p4,yf_p5" "yongfeng_agu")
|
||||
(define_cpu_unit "yf_p6,yf_p7,yf_p8,yf_p9" "yongfeng_fp")
|
||||
|
||||
(define_cpu_unit "yf_idiv" "yongfeng_idiv")
|
||||
(define_cpu_unit "yf_fdiv" "yongfeng_fdiv")
|
||||
|
||||
(define_reservation "yf_ieu" "yf_p0|yf_p1|yf_p2|yf_p3")
|
||||
(define_reservation "yf_p01" "yf_p0|yf_p1")
|
||||
(define_reservation "yf_agu" "yf_p4|yf_p5")
|
||||
(define_reservation "yf_feu" "yf_p6|yf_p7|yf_p8|yf_p9")
|
||||
|
||||
;; Only the irregular instructions have to be modeled here.
|
||||
|
||||
;; Complex instruction.
|
||||
(define_insn_reservation "yongfeng_complex_insn" 6
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(eq_attr "type" "other,multi,str"))
|
||||
"yf_decoder0")
|
||||
|
||||
;; Call instruction.
|
||||
(define_insn_reservation "yongfeng_call" 3
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(eq_attr "type" "call,callv"))
|
||||
"yf_decoder012,yf_agu,yf_ieu*3")
|
||||
;; Push and pop.
|
||||
(define_insn_reservation "yongfeng_push_reg" 1
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "store")
|
||||
(eq_attr "type" "push")))
|
||||
"yf_decodern,yf_agu,yf_ieu")
|
||||
|
||||
(define_insn_reservation "yongfeng_push_mem" 4
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "both")
|
||||
(eq_attr "type" "push")))
|
||||
"yf_decoder012,yf_agu,yf_ieu")
|
||||
|
||||
(define_insn_reservation "yongfeng_pop_reg" 4
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "pop")))
|
||||
"yf_decoder012,yf_p01,yf_agu")
|
||||
|
||||
(define_insn_reservation "yongfeng_pop_mem" 4
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "both")
|
||||
(eq_attr "type" "pop")))
|
||||
"yf_decoder0,yf_agu,yf_ieu")
|
||||
|
||||
(define_insn_reservation "yongfeng_leave" 3
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(eq_attr "type" "leave"))
|
||||
"yf_decoder0,yf_agu,yf_p01*3")
|
||||
|
||||
;; MOV - integer moves.
|
||||
(define_insn_reservation "yongfeng_imov" 1
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "imov,imovx")))
|
||||
"yf_decodern,yf_ieu")
|
||||
|
||||
(define_insn_reservation "yongfeng_imov_load" 4
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "imov")))
|
||||
"yf_decodern,yf_agu")
|
||||
|
||||
(define_insn_reservation "yongfeng_imovx_load" 4
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "imovx")))
|
||||
"yf_decoder012,yf_agu,yf_ieu|yf_ieu")
|
||||
|
||||
(define_insn_reservation "yongfeng_imov_store" 1
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "store")
|
||||
(eq_attr "type" "imov")))
|
||||
"yf_decodern,yf_agu,yf_ieu")
|
||||
|
||||
(define_insn_reservation "yongfeng_int_insn" 1
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "none,unknown")
|
||||
(eq_attr "type" "alu,alu1,icmov,icmp,test,lea,ishift1,rotate,rotate1,setcc,incdec")))
|
||||
"yf_decodern,yf_ieu")
|
||||
|
||||
(define_insn_reservation "yongfeng_int_insn_load" 5
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "alu,alu1,icmov,icmp,test,ishift1,rotate,rotate1,setcc")))
|
||||
"yf_decoder012,yf_agu,yf_ieu")
|
||||
|
||||
(define_insn_reservation "yongfeng_int_insn_store" 1
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "store")
|
||||
(eq_attr "type" "alu,alu1,icmp,test,ishift1,rotate,rotate1,setcc")))
|
||||
"yf_decoder012,yf_agu,yf_ieu")
|
||||
|
||||
(define_insn_reservation "yongfeng_int_insn_both" 5
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "both")
|
||||
(eq_attr "type" "alu,alu1,icmp,test,ishift1,rotate,rotate1,setcc,incdec")))
|
||||
"yf_decoder012,yf_agu,yf_ieu")
|
||||
|
||||
(define_insn_reservation "yongfeng_shift_HI" 5
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "none,unknown")
|
||||
(and (eq_attr "mode" "HI")
|
||||
(eq_attr "type" "ishift"))))
|
||||
"yf_decoder0,yf_ieu")
|
||||
|
||||
(define_insn_reservation "yongfeng_shift_SIDI" 2
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "none,unknown")
|
||||
(and (eq_attr "mode" "SI,DI")
|
||||
(eq_attr "type" "ishift"))))
|
||||
"yf_decoder0,yf_ieu")
|
||||
|
||||
(define_insn_reservation "yongfeng_shift_HI_mem" 9
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "!none")
|
||||
(and (eq_attr "mode" "HI")
|
||||
(eq_attr "type" "ishift"))))
|
||||
"yf_decoder0,yf_agu,yf_ieu")
|
||||
|
||||
(define_insn_reservation "yongfeng_shift_SIDI_mem" 6
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "!none")
|
||||
(and (eq_attr "mode" "SI,DI")
|
||||
(eq_attr "type" "ishift"))))
|
||||
"yf_decoder0,yf_agu,yf_ieu")
|
||||
|
||||
(define_insn_reservation "yongfeng_negnot_QIHI" 2
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "none,unknown")
|
||||
(and (eq_attr "mode" "QI,HI")
|
||||
(eq_attr "type" "negnot"))))
|
||||
"yf_decoder012,yf_ieu|yf_ieu")
|
||||
|
||||
(define_insn_reservation "yongfeng_negnot_SIDI" 1
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "none,unknown")
|
||||
(and (eq_attr "mode" "SI,DI")
|
||||
(eq_attr "type" "negnot"))))
|
||||
"yf_decodern,yf_ieu")
|
||||
|
||||
(define_insn_reservation "yongfeng_negnot_QIHI_mem" 6
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "!none")
|
||||
(and (eq_attr "mode" "QI,HI")
|
||||
(eq_attr "type" "negnot"))))
|
||||
"yf_decoder012,yf_agu,yf_ieu")
|
||||
|
||||
(define_insn_reservation "yongfeng_negnot_SIDI_mem" 5
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "!none")
|
||||
(and (eq_attr "mode" "SI,DI")
|
||||
(eq_attr "type" "negnot"))))
|
||||
"yf_decoder012,yf_agu,yf_ieu")
|
||||
|
||||
;; branch instruction
|
||||
(define_insn_reservation "yongfeng_branch" 3
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "ibr")))
|
||||
"yf_decodern,yf_p2*3")
|
||||
|
||||
(define_insn_reservation "yongfeng_branch_mem" 7
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "!none")
|
||||
(eq_attr "type" "ibr")))
|
||||
"yf_decodern,yf_agu,yf_p2")
|
||||
|
||||
;; Integer Multiplication instructions.
|
||||
|
||||
(define_insn_reservation "yongfeng_imul_QI" 2
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (eq_attr "mode" "QI")
|
||||
(eq_attr "type" "imul"))))
|
||||
"yf_decodern,yf_ieu|yf_ieu")
|
||||
|
||||
(define_insn_reservation "yongfeng_imul_HI" 3
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (eq_attr "mode" "HI")
|
||||
(eq_attr "type" "imul"))))
|
||||
"yf_decoder0,yf_ieu")
|
||||
|
||||
(define_insn_reservation "yongfeng_imul_SIDI" 2
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (eq_attr "mode" "SI,DI")
|
||||
(eq_attr "type" "imul"))))
|
||||
"yf_decoder0,yf_ieu|yf_ieu")
|
||||
|
||||
(define_insn_reservation "yongfeng_imul_QI_mem" 6
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "!none")
|
||||
(and (eq_attr "mode" "QI")
|
||||
(eq_attr "type" "imul"))))
|
||||
"yf_decoder012,yf_agu,yf_ieu")
|
||||
|
||||
(define_insn_reservation "yongfeng_imul_SIDI_mem" 6
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "!none")
|
||||
(and (eq_attr "mode" "SI,DI")
|
||||
(eq_attr "type" "imul"))))
|
||||
"yf_decoder0,yf_agu,yf_ieu")
|
||||
|
||||
(define_insn_reservation "yongfeng_imul_HI_mem" 7
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "!none")
|
||||
(and (eq_attr "mode" "HI")
|
||||
(eq_attr "type" "imul"))))
|
||||
"yf_decoder0,yf_agu,yf_ieu")
|
||||
|
||||
;; Integer Division instructions.
|
||||
|
||||
(define_insn_reservation "yongfeng_idiv_DI" 41
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (eq_attr "mode" "DI")
|
||||
(eq_attr "type" "idiv"))))
|
||||
"yf_decoder0,yf_ieu,yf_feu,yf_idiv*41")
|
||||
|
||||
(define_insn_reservation "yongfeng_idiv_HI" 9
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (eq_attr "mode" "HI")
|
||||
(eq_attr "type" "idiv"))))
|
||||
"yf_decoder0,yf_ieu,yf_feu,yf_idiv*3")
|
||||
|
||||
(define_insn_reservation "yongfeng_idiv_QISI" 8
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "none")
|
||||
(and (eq_attr "mode" "QI,SI")
|
||||
(eq_attr "type" "idiv"))))
|
||||
"yf_decoder0,yf_ieu,yf_feu,yf_idiv*3")
|
||||
|
||||
|
||||
(define_insn_reservation "yongfeng_idiv_mem_DI" 45
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "load")
|
||||
(and (eq_attr "mode" "DI")
|
||||
(eq_attr "type" "idiv"))))
|
||||
"yf_decoder0,yf_agu,yf_ieu,yf_feu,yf_idiv*41")
|
||||
|
||||
(define_insn_reservation "yongfeng_idiv_HI_mem" 13
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "load")
|
||||
(and (eq_attr "mode" "HI")
|
||||
(eq_attr "type" "idiv"))))
|
||||
"yf_decoder0,yf_agu,yf_ieu,yf_feu,yf_idiv*3")
|
||||
|
||||
|
||||
(define_insn_reservation "yongfeng_idiv_QISI_mem" 12
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "load")
|
||||
(and (eq_attr "mode" "QI,SI")
|
||||
(eq_attr "type" "idiv"))))
|
||||
"yf_decoder0,yf_agu,yf_ieu,yf_feu,yf_idiv*3")
|
||||
|
||||
;; MMX,SSE,AVX,AVX2 instructions
|
||||
;; sse moves
|
||||
|
||||
(define_insn_reservation "yongfeng_sse_mov" 1
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "ssemov"))))
|
||||
"yf_decodern,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_sse_mov_store" 1
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")
|
||||
(and (eq_attr "memory" "store")
|
||||
(eq_attr "type" "ssemov"))))
|
||||
"yf_decodern,yf_agu,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_sse_mov_load" 5
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "ssemov"))))
|
||||
"yf_decodern,yf_agu,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_avx256_mov" 1
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "V8SF,V4DF,OI")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "ssemov"))))
|
||||
"yf_decoder012,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_avx256_mov_store" 1
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "V8SF,V4DF,OI")
|
||||
(and (eq_attr "memory" "store")
|
||||
(eq_attr "type" "ssemov"))))
|
||||
"yf_decoder012,yf_agu,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_avx256_mov_load" 6
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "V8SF,V4DF,OI")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "ssemov"))))
|
||||
"yf_decoder012,yf_agu,yf_feu")
|
||||
|
||||
;;sse general instructions
|
||||
(define_insn_reservation "yongfeng_sse_insns" 3
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "sseadd,sseadd1,ssemul,ssecmp"))))
|
||||
"yf_decodern,yf_feu|yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_sse_insns_load" 7
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "SF,DF,V4SF,V2DF,TI")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "sseadd,sseadd1,ssemul,ssecmp"))))
|
||||
"yf_decodern,yf_agu,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_avx256_insns" 3
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "V8SF,V4DF,OI")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "sseadd,sseadd1,ssemul,ssecmp"))))
|
||||
"yf_decoder012,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_avx256_insns_load" 8
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "V8SF,V4DF,OI")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "sseadd,sseadd1,ssemul,ssecmp"))))
|
||||
"yf_decoder012,yf_agu,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_sse_iadd" 1
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "DI,TI")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "sseiadd"))))
|
||||
"yf_decodern,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_sse_iadd_load" 5
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "DI,TI")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "sseiadd"))))
|
||||
"yf_decodern,yf_agu,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_avx256_iadd" 1
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "OI")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "sseiadd"))))
|
||||
"yf_decoder012,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_avx256_iadd_load" 6
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "OI")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "sseiadd"))))
|
||||
"yf_decoder0,yf_agu,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_sse_iadd1" 2
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "sseiadd1")))
|
||||
"yf_decoder0,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_sse_iadd1_load" 6
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "sseiadd1")))
|
||||
"yf_decoder0,yf_agu,yf_feu")
|
||||
|
||||
;;sse imul
|
||||
(define_insn_reservation "yongfeng_sse_imul" 2
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "DI,TI")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "sseimul"))))
|
||||
"yf_decodern,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_sse_imul_load" 6
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "DI,TI")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "sseimul"))))
|
||||
"yf_decoder012,yf_agu,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_avx256_imul" 2
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "OI")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "sseimul"))))
|
||||
"yf_decoder012,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_avx256_imul_load" 7
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "OI")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "sseimul"))))
|
||||
"yf_decoder0,yf_agu,yf_feu")
|
||||
|
||||
;; sse FMA
|
||||
(define_insn_reservation "yongfeng_sse_fma" 5
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "SF,DF,V4SF,V2DF")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "ssemuladd"))))
|
||||
"yf_decodern,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_sse_fma_load" 9
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "SF,DF,V4SF,V2DF")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "ssemuladd"))))
|
||||
"yf_decoder012,yf_agu,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_avx256_fma" 5
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "V8SF,V4DF")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "ssemuladd"))))
|
||||
"yf_decoder012,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_avx256_fma_load" 10
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "V8SF,V4DF")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "ssemuladd"))))
|
||||
"yf_decoder0,yf_agu,yf_feu")
|
||||
;; sse div
|
||||
(define_insn_reservation "yongfeng_ssediv_s" 10
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "SF,V4SF")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "ssediv"))))
|
||||
"yf_decodern,yf_fdiv*2")
|
||||
|
||||
(define_insn_reservation "yongfeng_ssediv_s_load" 14
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "SF,V4SF")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "ssediv"))))
|
||||
"yf_decodern,yf_agu,yf_fdiv*2")
|
||||
|
||||
(define_insn_reservation "yongfeng_ssediv_d" 14
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "DF,V2DF")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "ssediv"))))
|
||||
"yf_decodern,yf_fdiv*3")
|
||||
|
||||
(define_insn_reservation "yongfeng_ssediv_d_load" 18
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "DF,V2DF")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "ssediv"))))
|
||||
"yf_decodern,yf_agu,yf_fdiv*3")
|
||||
|
||||
(define_insn_reservation "yongfeng_ssediv_avx256_s" 10
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "V8SF")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "ssediv"))))
|
||||
"yf_decoder012,yf_fdiv*10")
|
||||
|
||||
(define_insn_reservation "yongfeng_ssediv_avx256_s_load" 15
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "V8SF")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "ssediv"))))
|
||||
"yf_decoder012,yf_agu,yf_fdiv*10")
|
||||
|
||||
(define_insn_reservation "yongfeng_ssediv_avx256_d" 14
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "V4DF")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "ssediv"))))
|
||||
"yf_decoder012,yf_fdiv*14")
|
||||
|
||||
(define_insn_reservation "yongfeng_ssediv_avx256_d_load" 19
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "V4DF")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "ssediv"))))
|
||||
"yf_decoder012,yf_fdiv*14")
|
||||
|
||||
;;sse logical and shuffle instructions
|
||||
(define_insn_reservation "yongfeng_avx256_log_shuf" 1
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "V8SF,V4DF,OI")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1"))))
|
||||
"yf_decoder012,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_avx256_log_shuf_load" 6
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "V8SF,V4DF,OI")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1"))))
|
||||
"yf_decoder012,yf_agu,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_sse_log_shuf" 1
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")))
|
||||
"yf_decodern,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_sse_log_shuf_load" 5
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "sselog,sselog1,sseshuf,sseshuf1")))
|
||||
"yf_decodern,yf_agu,yf_feu")
|
||||
;;sse shift
|
||||
|
||||
(define_insn_reservation "yongfeng_avx256_shift" 1
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "V8SF,V4DF,OI")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "sseishft,sseishft1"))))
|
||||
"yf_decoder012,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_avx256_shift_load" 6
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "V8SF,V4DF,OI")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "sseishft,sseishft1"))))
|
||||
"yf_decoder0,yf_agu,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_sse_shift" 1
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "sseishft,sseishft1")))
|
||||
"yf_decodern,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_sse_shift_load" 5
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "sseishft,sseishft1")))
|
||||
"yf_decodern,yf_agu,yf_feu")
|
||||
;;sse comi
|
||||
(define_insn_reservation "yongfeng_avx256_test" 4
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "V8SF,V4DF,OI")
|
||||
(and (eq_attr "prefix_extra" "1")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "ssecomi")))))
|
||||
"yf_decoder012,yf_ieu*3")
|
||||
|
||||
(define_insn_reservation "yongfeng_avx256_test_load" 9
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "V8SF,V4DF,OI")
|
||||
(and (eq_attr "prefix_extra" "1")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "ssecomi")))))
|
||||
"yf_decoder012,yf_agu,yf_ieu,yf_p6*3")
|
||||
|
||||
(define_insn_reservation "yongfeng_sse_test" 3
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "prefix_extra" "1")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "ssecomi"))))
|
||||
"yf_decodern,yf_feu|yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_sse_test_load" 7
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "prefix_extra" "1")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "ssecomi"))))
|
||||
"yf_decodern,yf_agu,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_sse_comi" 1
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "prefix_extra" "0")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "ssecomi"))))
|
||||
"yf_decodern,yf_feu|yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_sse_comi_load" 4
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "prefix_extra" "0")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "ssecomi"))))
|
||||
"yf_decodern,yf_agu,yf_feu")
|
||||
|
||||
;;sse conversion
|
||||
(define_insn_reservation "yongfeng_avx_cvt_ps" 4
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "V4SF")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "ssecvt"))))
|
||||
"yf_decoder0,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_avx_cvt_ps_load" 8
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "V4SF")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "ssecvt"))))
|
||||
"yf_decoder0,yf_agu,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_avx_cvt_pd" 3
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "V4DF")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "ssecvt"))))
|
||||
"yf_decoder0,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_avx_cvt_pd_load" 7
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "V4DF")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "ssecvt"))))
|
||||
"yf_decoder0,yf_agu,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_sse_cvt" 3
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "ssecvt")))
|
||||
"yf_decodern,yf_feu|yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_sse_cvt_load" 7
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "ssecvt")))
|
||||
"yf_decoder012,yf_agu,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_sse_icvt" 3
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "sseicvt")))
|
||||
"yf_decodern,yf_feu|yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_sse_icvt_load" 7
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "sseicvt")))
|
||||
"yf_decoder012,yf_agu,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_sse_icvt_SI" 1
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "SI")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "sseicvt"))))
|
||||
"yf_decoder012,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_sse_icvt_SI_load" 5
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "SI")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "sseicvt"))))
|
||||
"yf_decoder012,yf_agu,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_sse_icvt_DI" 2
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "DI")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "sseicvt"))))
|
||||
"yf_decoder0,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_sse_icvt_DI_load" 6
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "mode" "DI")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "sseicvt"))))
|
||||
"yf_decoder0,yf_agu,yf_feu")
|
||||
;; MMX
|
||||
(define_insn_reservation "yongfeng_mmx_move" 1
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "mmxmov")))
|
||||
"yf_decodern,yf_p0")
|
||||
|
||||
(define_insn_reservation "yongfeng_mmx_move_load" 5
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "mmxmov")))
|
||||
"yf_decodern,yf_agu,yf_p0")
|
||||
|
||||
(define_insn_reservation "yongfeng_mmx_move_store" 1
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "store")
|
||||
(eq_attr "type" "mmxmov")))
|
||||
"yf_decodern,yf_agu,yf_p0")
|
||||
|
||||
(define_insn_reservation "yongfeng_mmx_mul" 2
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "mmxmul")))
|
||||
"yf_decodern,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_mmx_mul_load" 6
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "mmxmul")))
|
||||
"yf_decoder012,yf_agu,yf_feu")
|
||||
|
||||
;; MMX general instructions
|
||||
(define_insn_reservation "yongfeng_mmx_insns" 1
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "mmxadd,mmxshft,mmxcmp,mmx,mmxcvt")))
|
||||
"yf_decodern,yf_feu|yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_mmx_insns_load" 5
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "mmxadd,mmxshft,mmxcmp,mmx,mmxcvt")))
|
||||
"yf_decodern,yf_agu,yf_feu|yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_mmx_insns_store" 1
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "store")
|
||||
(eq_attr "type" "mmxadd,mmxshft,mmxcmp,mmx,mmxcvt")))
|
||||
"yf_decodern,yf_agu,yf_feu")
|
||||
|
||||
;; x87 floating point operations.
|
||||
|
||||
(define_insn_reservation "yongfeng_fxch" 1
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(eq_attr "type" "fxch"))
|
||||
"yf_decodern,yf_p0|yf_p1")
|
||||
|
||||
(define_insn_reservation "yongfeng_fcmov_sgn" 1
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(eq_attr "type" "fcmov,fsgn"))
|
||||
"yf_decodern,yf_p0|yf_p1,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_fcmp" 1
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "fcmp")))
|
||||
"yf_decodern,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_fcmp_load" 5
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "fcmp")))
|
||||
"yf_decodern,yf_agu,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_fmov" 1
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "fmov")))
|
||||
"yf_decodern,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_fmov_store" 1
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "store")
|
||||
(eq_attr "type" "fmov")))
|
||||
"yf_decoder0,yf_agu,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_fmov_load" 5
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "fmov")))
|
||||
"yf_decoder0,yf_agu,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_fistp" 5
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(eq_attr "type" "fistp,fisttp"))
|
||||
"yf_decoder012,yf_agu,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_fop_mul" 3
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "none,unknown")
|
||||
(eq_attr "type" "fop,fmul")))
|
||||
"yf_decodern,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_fop_mul_load" 7
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "load,both")
|
||||
(eq_attr "type" "fop,fmul")))
|
||||
"yf_decoder012,yf_agu,yf_feu")
|
||||
|
||||
(define_insn_reservation "yf_fop_store" 3
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "store")
|
||||
(eq_attr "type" "fop")))
|
||||
"yf_decodern,yf_agu,yf_feu")
|
||||
|
||||
(define_insn_reservation "yongfeng_fdiv_fpspc" 14
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "none")
|
||||
(eq_attr "type" "fdiv,fpspc")))
|
||||
"yf_decodern,yf_fdiv*7")
|
||||
|
||||
(define_insn_reservation "yongfeng_fdiv_fpspc_load" 18
|
||||
(and (eq_attr "cpu" "yongfeng")
|
||||
(and (eq_attr "memory" "load")
|
||||
(eq_attr "type" "fdiv,fpspc")))
|
||||
"yf_decoder012,yf_agu,yf_fdiv*7")
|
@ -22724,6 +22724,9 @@ Intel Knights Mill CPU.
|
||||
@item lujiazui
|
||||
ZHAOXIN lujiazui CPU.
|
||||
|
||||
@item yongfeng
|
||||
ZHAOXIN yongfeng CPU.
|
||||
|
||||
@item amdfam10h
|
||||
AMD Family 10h CPU.
|
||||
|
||||
|
@ -33313,6 +33313,12 @@ ZHAOXIN lujiazui CPU with x86-64, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1,
|
||||
SSE4.2, AVX, POPCNT, AES, PCLMUL, RDRND, XSAVE, XSAVEOPT, FSGSBASE, CX16,
|
||||
ABM, BMI, BMI2, F16C, FXSR, RDSEED instruction set support.
|
||||
|
||||
@item yongfeng
|
||||
ZHAOXIN yongfeng CPU with x86-64, MOVBE, MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1,
|
||||
SSE4.2, AVX, POPCNT, AES, PCLMUL, RDRND, XSAVE, XSAVEOPT, FSGSBASE, CX16,
|
||||
ABM, BMI, BMI2, F16C, FXSR, RDSEED, AVX2, FMA, SHA, LZCNT
|
||||
instruction set support.
|
||||
|
||||
@item geode
|
||||
AMD Geode embedded processor with MMX and 3DNow!@: instruction set support.
|
||||
@end table
|
||||
|
@ -17,6 +17,9 @@ int __attribute__ ((target("arch=lujiazui"))) foo () {
|
||||
return 1;
|
||||
}
|
||||
|
||||
int __attribute__ ((target("arch=yongfeng"))) foo () {
|
||||
return 2;
|
||||
}
|
||||
|
||||
int main ()
|
||||
{
|
||||
@ -24,6 +27,8 @@ int main ()
|
||||
|
||||
if (__builtin_cpu_is ("lujiazui"))
|
||||
assert (val == 1);
|
||||
else if (__builtin_cpu_is ("yongfeng"))
|
||||
assert (val == 2);
|
||||
else
|
||||
assert (val == 0);
|
||||
|
||||
|
@ -218,7 +218,8 @@ extern void test_arch_graniterapids_d (void) __attribute__((__target__("arch=gra
|
||||
extern void test_arch_arrowlake (void) __attribute__((__target__("arch=arrowlake")));
|
||||
extern void test_arch_arrowlake_s (void) __attribute__((__target__("arch=arrowlake-s")));
|
||||
extern void test_arch_pantherlake (void) __attribute__((__target__("arch=pantherlake")));
|
||||
extern void test_arch_lujiazui (void) __attribute__((__target__("arch=lujiazui")));
|
||||
extern void test_arch_lujiazui (void) __attribute__((__target__("arch=lujiazui")));
|
||||
extern void test_arch_yongfeng (void) __attribute__((__target__("arch=yongfeng")));
|
||||
extern void test_arch_k8 (void) __attribute__((__target__("arch=k8")));
|
||||
extern void test_arch_k8_sse3 (void) __attribute__((__target__("arch=k8-sse3")));
|
||||
extern void test_arch_opteron (void) __attribute__((__target__("arch=opteron")));
|
||||
@ -241,7 +242,8 @@ extern void test_tune_core2 (void) __attribute__((__target__("tune=core2")));
|
||||
extern void test_tune_corei7 (void) __attribute__((__target__("tune=corei7")));
|
||||
extern void test_tune_corei7_avx (void) __attribute__((__target__("tune=corei7-avx")));
|
||||
extern void test_tune_core_avx2 (void) __attribute__((__target__("tune=core-avx2")));
|
||||
extern void test_tune_lujiazui (void) __attribute__((__target__("tune=lujiazui")));
|
||||
extern void test_tune_lujiazui (void) __attribute__((__target__("tune=lujiazui")));
|
||||
extern void test_tune_yongfeng (void) __attribute__((__target__("tune=yongfeng")));
|
||||
extern void test_tune_k8 (void) __attribute__((__target__("tune=k8")));
|
||||
extern void test_tune_k8_sse3 (void) __attribute__((__target__("tune=k8-sse3")));
|
||||
extern void test_tune_opteron (void) __attribute__((__target__("tune=opteron")));
|
||||
|
Loading…
Reference in New Issue
Block a user