x86[_64]cpuid.pl: further refine shared cache detection.

This commit is contained in:
Andy Polyakov 2009-05-14 18:17:26 +00:00
parent 6f71e5ee6a
commit 761393bba7
2 changed files with 63 additions and 9 deletions

View File

@ -50,6 +50,8 @@ OPENSSL_ia32_cpuid:
xor %eax,%eax
cpuid
mov %eax,%r11d # max value for standard query level
xor %eax,%eax
cmp \$0x756e6547,%ebx # "Genu"
setne %al
@ -60,7 +62,6 @@ OPENSSL_ia32_cpuid:
cmp \$0x6c65746e,%ecx # "ntel"
setne %al
or %eax,%r9d # 0 indicates Intel CPU
mov \$1,%r10d # "number of [AMD] cores"
jz .Lintel
cmp \$0x68747541,%ebx # "Auth"
@ -74,10 +75,10 @@ OPENSSL_ia32_cpuid:
or %eax,%r10d # 0 indicates AMD CPU
jnz .Lintel
# AMD specific
mov \$0x80000000,%eax
cpuid
cmp \$0x80000008,%eax
mov \$1,%r10d # "number of [AMD] cores"
jb .Lintel
mov \$0x80000008,%eax
@ -85,7 +86,29 @@ OPENSSL_ia32_cpuid:
movzb %cl,%r10 # number of cores - 1
inc %r10 # number of cores
mov \$1,%eax
cpuid
bt \$28,%edx # test hyper-threading bit
jnc .Ldone
shr \$16,%ebx # number of logical processors
cmp %r10b,%bl
ja .Ldone
and \$0xefffffff,%edx # ~(1<<28)
jmp .Ldone
.Lintel:
cmp \$4,%r11d
mov \$-1,%r10d
jb .Lnocacheinfo
mov \$4,%eax
mov \$0,%ecx # query L1D
cpuid
mov %eax,%r10d
shr \$14,%r10d
and \$0xfff,%r10d # number of cores -1 per L1D
.Lnocacheinfo:
mov \$1,%eax
cpuid
cmp \$0,%r9d
@ -98,8 +121,13 @@ OPENSSL_ia32_cpuid:
.Lnotintel:
bt \$28,%edx # test hyper-threading bit
jnc .Ldone
and \$0xefffffff,%edx # ~(1<<28)
cmp \$0,%r10d
je .Ldone
or \$0x10000000,%edx # 1<<28
shr \$16,%ebx
cmp %r10b,%bl # see if cache is shared
cmp \$1,%bl # see if cache is shared
ja .Ldone
and \$0xefffffff,%edx # ~(1<<28)
.Ldone:

View File

@ -23,6 +23,8 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
&jnc (&label("done"));
&xor ("eax","eax");
&cpuid ();
&mov ("edi","eax"); # max value for standard query level
&xor ("eax","eax");
&cmp ("ebx",0x756e6547); # "Genu"
&setne (&LB("eax"));
@ -33,7 +35,6 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
&cmp ("ecx",0x6c65746e); # "ntel"
&setne (&LB("eax"));
&or ("ebp","eax"); # 0 indicates Intel CPU
&mov ("esi",1); # "number of [AMD] cores"
&jz (&label("intel"));
&cmp ("ebx",0x68747541); # "Auth"
@ -47,10 +48,10 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
&or ("esi","eax"); # 0 indicates AMD CPU
&jnz (&label("intel"));
# AMD specific
&mov ("eax",0x80000000);
&cpuid ();
&cmp ("eax",0x80000008);
&mov ("esi",1); # "number of [AMD] cores"
&jb (&label("intel"));
&mov ("eax",0x80000008);
@ -58,7 +59,30 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
&movz ("esi",&LB("ecx")); # number of cores - 1
&inc ("esi"); # number of cores
&mov ("eax",1);
&cpuid ();
&bt ("edx",28);
&jnc (&label("done"));
&shr ("ebx",16);
&and ("ebx",0xff);
&cmp ("ebx","esi");
&ja (&label("done"));
&and ("edx",0xefffffff); # clear hyper-threading bit
&jmp (&label("done"));
&set_label("intel");
&cmp ("edi",4);
&mov ("edi",-1);
&jb (&label("nocacheinfo"));
&mov ("eax",4);
&mov ("ecx",0); # query L1D
&cpuid ();
&mov ("edi","eax");
&shr ("edi",14);
&and ("edi",0xfff); # number of cores -1 per L1D
&set_label("nocacheinfo");
&mov ("eax",1);
&cpuid ();
&cmp ("ebp",0);
@ -70,17 +94,19 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
&set_label("notP4");
&bt ("edx",28); # test hyper-threading bit
&jnc (&label("done"));
&and ("edx",0xefffffff);
&cmp ("edi",0);
&je (&label("done"));
&or ("edx",0x10000000);
&shr ("ebx",16);
&and ("ebx",0xff);
&cmp ("ebx","esi"); # see if cache is shared(*)
&cmp (&LB("ebx"),1);
&ja (&label("done"));
&and ("edx",0xefffffff); # clear hyper-threading bit if not
&set_label("done");
&mov ("eax","edx");
&mov ("edx","ecx");
&function_end("OPENSSL_ia32_cpuid");
# (*) on Core2 this value is set to 2 denoting the fact that L2
# cache is shared between cores.
&external_label("OPENSSL_ia32cap_P");