mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-27 03:33:33 +08:00
x86: Add support for AVX10 preset and vec size in cpu-features
This commit add support for the new AVX10 cpu features: https://cdrdv2-public.intel.com/784267/355989-intel-avx10-spec.pdf We add checks for: - `AVX10`: Check if AVX10 is present. - `AVX10_{X,Y,Z}MM`: Check if a given vec class has AVX10 support. `make check` passes and cpuid output was checked against GNR/DMR on an emulator.
This commit is contained in:
parent
5f913506f4
commit
d90b43a4ed
@ -222,6 +222,18 @@ Leaf (EAX = 23H).
|
|||||||
@item
|
@item
|
||||||
@code{AVX} -- The AVX instruction extensions.
|
@code{AVX} -- The AVX instruction extensions.
|
||||||
|
|
||||||
|
@item
|
||||||
|
@code{AVX10} -- The AVX10 instruction extensions.
|
||||||
|
|
||||||
|
@item
|
||||||
|
@code{AVX10_XMM} -- Whether AVX10 includes xmm registers.
|
||||||
|
|
||||||
|
@item
|
||||||
|
@code{AVX10_YMM} -- Whether AVX10 includes ymm registers.
|
||||||
|
|
||||||
|
@item
|
||||||
|
@code{AVX10_ZMM} -- Whether AVX10 includes zmm registers.
|
||||||
|
|
||||||
@item
|
@item
|
||||||
@code{AVX2} -- The AVX2 instruction extensions.
|
@code{AVX2} -- The AVX2 instruction extensions.
|
||||||
|
|
||||||
|
@ -30,7 +30,8 @@ enum
|
|||||||
CPUID_INDEX_80000008,
|
CPUID_INDEX_80000008,
|
||||||
CPUID_INDEX_7_ECX_1,
|
CPUID_INDEX_7_ECX_1,
|
||||||
CPUID_INDEX_19,
|
CPUID_INDEX_19,
|
||||||
CPUID_INDEX_14_ECX_0
|
CPUID_INDEX_14_ECX_0,
|
||||||
|
CPUID_INDEX_24_ECX_0
|
||||||
};
|
};
|
||||||
|
|
||||||
struct cpuid_feature
|
struct cpuid_feature
|
||||||
@ -312,6 +313,7 @@ enum
|
|||||||
x86_cpu_AVX_NE_CONVERT = x86_cpu_index_7_ecx_1_edx + 5,
|
x86_cpu_AVX_NE_CONVERT = x86_cpu_index_7_ecx_1_edx + 5,
|
||||||
x86_cpu_AMX_COMPLEX = x86_cpu_index_7_ecx_1_edx + 8,
|
x86_cpu_AMX_COMPLEX = x86_cpu_index_7_ecx_1_edx + 8,
|
||||||
x86_cpu_PREFETCHI = x86_cpu_index_7_ecx_1_edx + 14,
|
x86_cpu_PREFETCHI = x86_cpu_index_7_ecx_1_edx + 14,
|
||||||
|
x86_cpu_AVX10 = x86_cpu_index_7_ecx_1_edx + 19,
|
||||||
x86_cpu_APX_F = x86_cpu_index_7_ecx_1_edx + 21,
|
x86_cpu_APX_F = x86_cpu_index_7_ecx_1_edx + 21,
|
||||||
|
|
||||||
x86_cpu_index_19_ebx
|
x86_cpu_index_19_ebx
|
||||||
@ -325,5 +327,13 @@ enum
|
|||||||
= (CPUID_INDEX_14_ECX_0 * 8 * 4 * sizeof (unsigned int)
|
= (CPUID_INDEX_14_ECX_0 * 8 * 4 * sizeof (unsigned int)
|
||||||
+ cpuid_register_index_ebx * 8 * sizeof (unsigned int)),
|
+ cpuid_register_index_ebx * 8 * sizeof (unsigned int)),
|
||||||
|
|
||||||
x86_cpu_PTWRITE = x86_cpu_index_14_ecx_0_ebx + 4
|
x86_cpu_PTWRITE = x86_cpu_index_14_ecx_0_ebx + 4,
|
||||||
|
|
||||||
|
x86_cpu_index_24_ecx_0_ebx
|
||||||
|
= (CPUID_INDEX_24_ECX_0 * 8 * 4 * sizeof (unsigned int)
|
||||||
|
+ cpuid_register_index_ebx * 8 * sizeof (unsigned int)),
|
||||||
|
|
||||||
|
x86_cpu_AVX10_XMM = x86_cpu_index_24_ecx_0_ebx + 16,
|
||||||
|
x86_cpu_AVX10_YMM = x86_cpu_index_24_ecx_0_ebx + 17,
|
||||||
|
x86_cpu_AVX10_ZMM = x86_cpu_index_24_ecx_0_ebx + 18,
|
||||||
};
|
};
|
||||||
|
@ -115,11 +115,18 @@ update_active (struct cpu_features *cpu_features)
|
|||||||
CPU_FEATURE_SET_ACTIVE (cpu_features, SHSTK);
|
CPU_FEATURE_SET_ACTIVE (cpu_features, SHSTK);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
enum
|
||||||
|
{
|
||||||
|
os_xmm = 1,
|
||||||
|
os_ymm = 2,
|
||||||
|
os_zmm = 4
|
||||||
|
} os_vector_size = os_xmm;
|
||||||
/* Can we call xgetbv? */
|
/* Can we call xgetbv? */
|
||||||
if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
|
if (CPU_FEATURES_CPU_P (cpu_features, OSXSAVE))
|
||||||
{
|
{
|
||||||
unsigned int xcrlow;
|
unsigned int xcrlow;
|
||||||
unsigned int xcrhigh;
|
unsigned int xcrhigh;
|
||||||
|
CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10);
|
||||||
asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
|
asm ("xgetbv" : "=a" (xcrlow), "=d" (xcrhigh) : "c" (0));
|
||||||
/* Is YMM and XMM state usable? */
|
/* Is YMM and XMM state usable? */
|
||||||
if ((xcrlow & (bit_YMM_state | bit_XMM_state))
|
if ((xcrlow & (bit_YMM_state | bit_XMM_state))
|
||||||
@ -128,6 +135,7 @@ update_active (struct cpu_features *cpu_features)
|
|||||||
/* Determine if AVX is usable. */
|
/* Determine if AVX is usable. */
|
||||||
if (CPU_FEATURES_CPU_P (cpu_features, AVX))
|
if (CPU_FEATURES_CPU_P (cpu_features, AVX))
|
||||||
{
|
{
|
||||||
|
os_vector_size |= os_ymm;
|
||||||
CPU_FEATURE_SET (cpu_features, AVX);
|
CPU_FEATURE_SET (cpu_features, AVX);
|
||||||
/* The following features depend on AVX being usable. */
|
/* The following features depend on AVX being usable. */
|
||||||
/* Determine if AVX2 is usable. */
|
/* Determine if AVX2 is usable. */
|
||||||
@ -166,6 +174,7 @@ update_active (struct cpu_features *cpu_features)
|
|||||||
| bit_ZMM16_31_state))
|
| bit_ZMM16_31_state))
|
||||||
== (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
|
== (bit_Opmask_state | bit_ZMM0_15_state | bit_ZMM16_31_state))
|
||||||
{
|
{
|
||||||
|
os_vector_size |= os_zmm;
|
||||||
/* Determine if AVX512F is usable. */
|
/* Determine if AVX512F is usable. */
|
||||||
if (CPU_FEATURES_CPU_P (cpu_features, AVX512F))
|
if (CPU_FEATURES_CPU_P (cpu_features, AVX512F))
|
||||||
{
|
{
|
||||||
@ -210,6 +219,22 @@ update_active (struct cpu_features *cpu_features)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (CPU_FEATURES_CPU_P (cpu_features, AVX10)
|
||||||
|
&& cpu_features->basic.max_cpuid >= 0x24)
|
||||||
|
{
|
||||||
|
__cpuid_count (
|
||||||
|
0x24, 0, cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.eax,
|
||||||
|
cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ebx,
|
||||||
|
cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.ecx,
|
||||||
|
cpu_features->features[CPUID_INDEX_24_ECX_0].cpuid.edx);
|
||||||
|
if (os_vector_size & os_xmm)
|
||||||
|
CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_XMM);
|
||||||
|
if (os_vector_size & os_ymm)
|
||||||
|
CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_YMM);
|
||||||
|
if (os_vector_size & os_zmm)
|
||||||
|
CPU_FEATURE_SET_ACTIVE (cpu_features, AVX10_ZMM);
|
||||||
|
}
|
||||||
|
|
||||||
/* Are XTILECFG and XTILEDATA states usable? */
|
/* Are XTILECFG and XTILEDATA states usable? */
|
||||||
if ((xcrlow & (bit_XTILECFG_state | bit_XTILEDATA_state))
|
if ((xcrlow & (bit_XTILECFG_state | bit_XTILEDATA_state))
|
||||||
== (bit_XTILECFG_state | bit_XTILEDATA_state))
|
== (bit_XTILECFG_state | bit_XTILEDATA_state))
|
||||||
|
@ -29,7 +29,7 @@
|
|||||||
|
|
||||||
enum
|
enum
|
||||||
{
|
{
|
||||||
CPUID_INDEX_MAX = CPUID_INDEX_14_ECX_0 + 1
|
CPUID_INDEX_MAX = CPUID_INDEX_24_ECX_0 + 1
|
||||||
};
|
};
|
||||||
|
|
||||||
enum
|
enum
|
||||||
@ -319,6 +319,7 @@ enum
|
|||||||
#define bit_cpu_AVX_NE_CONVERT (1u << 5)
|
#define bit_cpu_AVX_NE_CONVERT (1u << 5)
|
||||||
#define bit_cpu_AMX_COMPLEX (1u << 8)
|
#define bit_cpu_AMX_COMPLEX (1u << 8)
|
||||||
#define bit_cpu_PREFETCHI (1u << 14)
|
#define bit_cpu_PREFETCHI (1u << 14)
|
||||||
|
#define bit_cpu_AVX10 (1u << 19)
|
||||||
#define bit_cpu_APX_F (1u << 21)
|
#define bit_cpu_APX_F (1u << 21)
|
||||||
|
|
||||||
/* CPUID_INDEX_19. */
|
/* CPUID_INDEX_19. */
|
||||||
@ -332,6 +333,13 @@ enum
|
|||||||
/* EBX. */
|
/* EBX. */
|
||||||
#define bit_cpu_PTWRITE (1u << 4)
|
#define bit_cpu_PTWRITE (1u << 4)
|
||||||
|
|
||||||
|
/* CPUID_INDEX_24_ECX_0. */
|
||||||
|
|
||||||
|
/* EBX. */
|
||||||
|
#define bit_cpu_AVX10_XMM (1u << 16)
|
||||||
|
#define bit_cpu_AVX10_YMM (1u << 17)
|
||||||
|
#define bit_cpu_AVX10_ZMM (1u << 18)
|
||||||
|
|
||||||
/* CPUID_INDEX_1. */
|
/* CPUID_INDEX_1. */
|
||||||
|
|
||||||
/* ECX. */
|
/* ECX. */
|
||||||
@ -563,6 +571,7 @@ enum
|
|||||||
#define index_cpu_AVX_NE_CONVERT CPUID_INDEX_7_ECX_1
|
#define index_cpu_AVX_NE_CONVERT CPUID_INDEX_7_ECX_1
|
||||||
#define index_cpu_AMX_COMPLEX CPUID_INDEX_7_ECX_1
|
#define index_cpu_AMX_COMPLEX CPUID_INDEX_7_ECX_1
|
||||||
#define index_cpu_PREFETCHI CPUID_INDEX_7_ECX_1
|
#define index_cpu_PREFETCHI CPUID_INDEX_7_ECX_1
|
||||||
|
#define index_cpu_AVX10 CPUID_INDEX_7_ECX_1
|
||||||
#define index_cpu_APX_F CPUID_INDEX_7_ECX_1
|
#define index_cpu_APX_F CPUID_INDEX_7_ECX_1
|
||||||
|
|
||||||
/* CPUID_INDEX_19. */
|
/* CPUID_INDEX_19. */
|
||||||
@ -576,6 +585,13 @@ enum
|
|||||||
/* EBX. */
|
/* EBX. */
|
||||||
#define index_cpu_PTWRITE CPUID_INDEX_14_ECX_0
|
#define index_cpu_PTWRITE CPUID_INDEX_14_ECX_0
|
||||||
|
|
||||||
|
/* CPUID_INDEX_24_ECX_0. */
|
||||||
|
|
||||||
|
/* EBX. */
|
||||||
|
#define index_cpu_AVX10_XMM CPUID_INDEX_24_ECX_0
|
||||||
|
#define index_cpu_AVX10_YMM CPUID_INDEX_24_ECX_0
|
||||||
|
#define index_cpu_AVX10_ZMM CPUID_INDEX_24_ECX_0
|
||||||
|
|
||||||
/* CPUID_INDEX_1. */
|
/* CPUID_INDEX_1. */
|
||||||
|
|
||||||
/* ECX. */
|
/* ECX. */
|
||||||
@ -809,6 +825,7 @@ enum
|
|||||||
#define reg_AVX_NE_CONVERT edx
|
#define reg_AVX_NE_CONVERT edx
|
||||||
#define reg_AMX_COMPLEX edx
|
#define reg_AMX_COMPLEX edx
|
||||||
#define reg_PREFETCHI edx
|
#define reg_PREFETCHI edx
|
||||||
|
#define reg_AVX10 edx
|
||||||
#define reg_APX_F edx
|
#define reg_APX_F edx
|
||||||
|
|
||||||
/* CPUID_INDEX_19. */
|
/* CPUID_INDEX_19. */
|
||||||
@ -822,6 +839,14 @@ enum
|
|||||||
/* EBX. */
|
/* EBX. */
|
||||||
#define reg_PTWRITE ebx
|
#define reg_PTWRITE ebx
|
||||||
|
|
||||||
|
/* CPUID_INDEX_24_ECX_0. */
|
||||||
|
|
||||||
|
/* EBX. */
|
||||||
|
#define reg_AVX10_XMM ebx
|
||||||
|
#define reg_AVX10_YMM ebx
|
||||||
|
#define reg_AVX10_ZMM ebx
|
||||||
|
|
||||||
|
|
||||||
/* PREFERRED_FEATURE_INDEX_1. First define the bitindex values
|
/* PREFERRED_FEATURE_INDEX_1. First define the bitindex values
|
||||||
sequentially, then define the bit_arch* and index_arch_* lookup
|
sequentially, then define the bit_arch* and index_arch_* lookup
|
||||||
constants. */
|
constants. */
|
||||||
|
@ -219,6 +219,7 @@ do_test (void)
|
|||||||
CHECK_CPU_FEATURE_PRESENT (AVX_NE_CONVERT);
|
CHECK_CPU_FEATURE_PRESENT (AVX_NE_CONVERT);
|
||||||
CHECK_CPU_FEATURE_PRESENT (AMX_COMPLEX);
|
CHECK_CPU_FEATURE_PRESENT (AMX_COMPLEX);
|
||||||
CHECK_CPU_FEATURE_PRESENT (PREFETCHI);
|
CHECK_CPU_FEATURE_PRESENT (PREFETCHI);
|
||||||
|
CHECK_CPU_FEATURE_PRESENT (AVX10);
|
||||||
CHECK_CPU_FEATURE_PRESENT (APX_F);
|
CHECK_CPU_FEATURE_PRESENT (APX_F);
|
||||||
CHECK_CPU_FEATURE_PRESENT (AESKLE);
|
CHECK_CPU_FEATURE_PRESENT (AESKLE);
|
||||||
CHECK_CPU_FEATURE_PRESENT (WIDE_KL);
|
CHECK_CPU_FEATURE_PRESENT (WIDE_KL);
|
||||||
@ -391,11 +392,18 @@ do_test (void)
|
|||||||
CHECK_CPU_FEATURE_ACTIVE (AVX_NE_CONVERT);
|
CHECK_CPU_FEATURE_ACTIVE (AVX_NE_CONVERT);
|
||||||
CHECK_CPU_FEATURE_ACTIVE (AMX_COMPLEX);
|
CHECK_CPU_FEATURE_ACTIVE (AMX_COMPLEX);
|
||||||
CHECK_CPU_FEATURE_ACTIVE (PREFETCHI);
|
CHECK_CPU_FEATURE_ACTIVE (PREFETCHI);
|
||||||
|
CHECK_CPU_FEATURE_ACTIVE (AVX10);
|
||||||
CHECK_CPU_FEATURE_ACTIVE (APX_F);
|
CHECK_CPU_FEATURE_ACTIVE (APX_F);
|
||||||
CHECK_CPU_FEATURE_ACTIVE (AESKLE);
|
CHECK_CPU_FEATURE_ACTIVE (AESKLE);
|
||||||
CHECK_CPU_FEATURE_ACTIVE (WIDE_KL);
|
CHECK_CPU_FEATURE_ACTIVE (WIDE_KL);
|
||||||
CHECK_CPU_FEATURE_ACTIVE (PTWRITE);
|
CHECK_CPU_FEATURE_ACTIVE (PTWRITE);
|
||||||
|
|
||||||
|
if (CPU_FEATURE_ACTIVE (AVX10))
|
||||||
|
{
|
||||||
|
CHECK_CPU_FEATURE_ACTIVE (AVX10_XMM);
|
||||||
|
CHECK_CPU_FEATURE_ACTIVE (AVX10_YMM);
|
||||||
|
CHECK_CPU_FEATURE_ACTIVE (AVX10_ZMM);
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user