mirror of
https://sourceware.org/git/glibc.git
synced 2024-11-24 02:03:35 +08:00
Check the HTT bit before counting logical threads
Skip counting logical threads for Intel processors if the HTT bit is 0 which indicates there is only a single logical processor. * sysdeps/x86/cacheinfo.c (init_cacheinfo): Skip counting logical threads if the HTT bit is 0. * sysdeps/x86/cpu-features.h (bit_cpu_HTT): New. (index_cpu_HTT): Likewise. (reg_HTT): Likewise.
This commit is contained in:
parent
eb2c88c7c8
commit
7c08d791ee
@ -1,3 +1,11 @@
|
||||
2016-05-19 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
* sysdeps/x86/cacheinfo.c (init_cacheinfo): Skip counting
|
||||
logical threads if the HTT bit is 0.
|
||||
* sysdeps/x86/cpu-features.h (bit_cpu_HTT): New.
|
||||
(index_cpu_HTT): Likewise.
|
||||
(reg_HTT): Likewise.
|
||||
|
||||
2016-05-19 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
[BZ #20115]
|
||||
|
@ -506,99 +506,105 @@ init_cacheinfo (void)
|
||||
shared = core;
|
||||
}
|
||||
|
||||
/* Figure out the number of logical threads that share the
|
||||
highest cache level. */
|
||||
if (max_cpuid >= 4)
|
||||
/* A value of 0 for the HTT bit indicates there is only a single
|
||||
logical processor. */
|
||||
if (HAS_CPU_FEATURE (HTT))
|
||||
{
|
||||
unsigned int family = GLRO(dl_x86_cpu_features).family;
|
||||
unsigned int model = GLRO(dl_x86_cpu_features).model;
|
||||
|
||||
int i = 0;
|
||||
|
||||
/* Query until desired cache level is enumerated. */
|
||||
do
|
||||
/* Figure out the number of logical threads that share the
|
||||
highest cache level. */
|
||||
if (max_cpuid >= 4)
|
||||
{
|
||||
__cpuid_count (4, i++, eax, ebx, ecx, edx);
|
||||
unsigned int family = GLRO(dl_x86_cpu_features).family;
|
||||
unsigned int model = GLRO(dl_x86_cpu_features).model;
|
||||
|
||||
/* There seems to be a bug in at least some Pentium Ds
|
||||
which sometimes fail to iterate all cache parameters.
|
||||
Do not loop indefinitely here, stop in this case and
|
||||
assume there is no such information. */
|
||||
if ((eax & 0x1f) == 0)
|
||||
goto intel_bug_no_cache_info;
|
||||
}
|
||||
while (((eax >> 5) & 0x7) != level);
|
||||
int i = 0;
|
||||
|
||||
/* Check if cache is inclusive of lower cache levels. */
|
||||
inclusive_cache = (edx & 0x2) != 0;
|
||||
|
||||
threads = (eax >> 14) & 0x3ff;
|
||||
|
||||
/* If max_cpuid >= 11, THREADS is the maximum number of
|
||||
addressable IDs for logical processors sharing the
|
||||
cache, instead of the maximum number of threads
|
||||
sharing the cache. */
|
||||
if (threads && max_cpuid >= 11)
|
||||
{
|
||||
/* Find the number of logical processors shipped in
|
||||
one core and apply count mask. */
|
||||
i = 0;
|
||||
while (1)
|
||||
/* Query until desired cache level is enumerated. */
|
||||
do
|
||||
{
|
||||
__cpuid_count (11, i++, eax, ebx, ecx, edx);
|
||||
__cpuid_count (4, i++, eax, ebx, ecx, edx);
|
||||
|
||||
int shipped = ebx & 0xff;
|
||||
int type = ecx & 0xff0;
|
||||
if (shipped == 0 || type == 0)
|
||||
break;
|
||||
else if (type == 0x200)
|
||||
/* There seems to be a bug in at least some Pentium Ds
|
||||
which sometimes fail to iterate all cache parameters.
|
||||
Do not loop indefinitely here, stop in this case and
|
||||
assume there is no such information. */
|
||||
if ((eax & 0x1f) == 0)
|
||||
goto intel_bug_no_cache_info;
|
||||
}
|
||||
while (((eax >> 5) & 0x7) != level);
|
||||
|
||||
/* Check if cache is inclusive of lower cache levels. */
|
||||
inclusive_cache = (edx & 0x2) != 0;
|
||||
|
||||
threads = (eax >> 14) & 0x3ff;
|
||||
|
||||
/* If max_cpuid >= 11, THREADS is the maximum number of
|
||||
addressable IDs for logical processors sharing the
|
||||
cache, instead of the maximum number of threads
|
||||
sharing the cache. */
|
||||
if (threads && max_cpuid >= 11)
|
||||
{
|
||||
/* Find the number of logical processors shipped in
|
||||
one core and apply count mask. */
|
||||
i = 0;
|
||||
while (1)
|
||||
{
|
||||
int count_mask;
|
||||
__cpuid_count (11, i++, eax, ebx, ecx, edx);
|
||||
|
||||
/* Compute count mask. */
|
||||
asm ("bsr %1, %0"
|
||||
: "=r" (count_mask) : "g" (threads));
|
||||
count_mask = ~(-1 << (count_mask + 1));
|
||||
threads = (shipped - 1) & count_mask;
|
||||
int shipped = ebx & 0xff;
|
||||
int type = ecx & 0xff0;
|
||||
if (shipped == 0 || type == 0)
|
||||
break;
|
||||
else if (type == 0x200)
|
||||
{
|
||||
int count_mask;
|
||||
|
||||
/* Compute count mask. */
|
||||
asm ("bsr %1, %0"
|
||||
: "=r" (count_mask) : "g" (threads));
|
||||
count_mask = ~(-1 << (count_mask + 1));
|
||||
threads = (shipped - 1) & count_mask;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
threads += 1;
|
||||
if (threads > 2 && level == 2 && family == 6)
|
||||
{
|
||||
switch (model)
|
||||
{
|
||||
case 0x57:
|
||||
/* Knights Landing has L2 cache shared by 2 cores. */
|
||||
case 0x37:
|
||||
case 0x4a:
|
||||
case 0x4d:
|
||||
case 0x5a:
|
||||
case 0x5d:
|
||||
/* Silvermont has L2 cache shared by 2 cores. */
|
||||
threads = 2;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
threads += 1;
|
||||
if (threads > 2 && level == 2 && family == 6)
|
||||
else
|
||||
{
|
||||
switch (model)
|
||||
{
|
||||
case 0x57:
|
||||
/* Knights Landing has L2 cache shared by 2 cores. */
|
||||
case 0x37:
|
||||
case 0x4a:
|
||||
case 0x4d:
|
||||
case 0x5a:
|
||||
case 0x5d:
|
||||
/* Silvermont has L2 cache shared by 2 cores. */
|
||||
threads = 2;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
intel_bug_no_cache_info:
|
||||
/* Assume that all logical threads share the highest cache
|
||||
level. */
|
||||
|
||||
threads
|
||||
= ((GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].ebx
|
||||
>> 16) & 0xff);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
intel_bug_no_cache_info:
|
||||
/* Assume that all logical threads share the highest cache level. */
|
||||
|
||||
threads
|
||||
= ((GLRO(dl_x86_cpu_features).cpuid[COMMON_CPUID_INDEX_1].ebx
|
||||
>> 16) & 0xff);
|
||||
/* Cap usage of highest cache level to the number of supported
|
||||
threads. */
|
||||
if (shared > 0 && threads > 0)
|
||||
shared /= threads;
|
||||
}
|
||||
|
||||
/* Cap usage of highest cache level to the number of supported
|
||||
threads. */
|
||||
if (shared > 0 && threads > 0)
|
||||
shared /= threads;
|
||||
|
||||
/* Account for non-inclusive L2 and L3 caches. */
|
||||
if (level == 3 && !inclusive_cache)
|
||||
shared += core;
|
||||
|
@ -51,6 +51,7 @@
|
||||
#define bit_cpu_POPCOUNT (1 << 23)
|
||||
#define bit_cpu_FMA (1 << 12)
|
||||
#define bit_cpu_FMA4 (1 << 16)
|
||||
#define bit_cpu_HTT (1 << 28)
|
||||
|
||||
/* COMMON_CPUID_INDEX_7. */
|
||||
#define bit_cpu_ERMS (1 << 9)
|
||||
@ -235,6 +236,7 @@ extern const struct cpu_features *__get_cpu_features (void)
|
||||
# define index_cpu_FMA4 COMMON_CPUID_INDEX_80000001
|
||||
# define index_cpu_POPCOUNT COMMON_CPUID_INDEX_1
|
||||
# define index_cpu_OSXSAVE COMMON_CPUID_INDEX_1
|
||||
# define index_cpu_HTT COMMON_CPUID_INDEX_1
|
||||
|
||||
# define reg_CX8 edx
|
||||
# define reg_CMOV edx
|
||||
@ -252,6 +254,7 @@ extern const struct cpu_features *__get_cpu_features (void)
|
||||
# define reg_FMA4 ecx
|
||||
# define reg_POPCOUNT ecx
|
||||
# define reg_OSXSAVE ecx
|
||||
# define reg_HTT edx
|
||||
|
||||
# define index_arch_Fast_Rep_String FEATURE_INDEX_1
|
||||
# define index_arch_Fast_Copy_Backward FEATURE_INDEX_1
|
||||
|
Loading…
Reference in New Issue
Block a user