mirror of
https://github.com/openssl/openssl.git
synced 2024-12-14 20:43:46 +08:00
x86[_64]cpuid.pl: further refine shared cache detection.
This commit is contained in:
parent
6f71e5ee6a
commit
761393bba7
@ -50,6 +50,8 @@ OPENSSL_ia32_cpuid:
|
||||
|
||||
xor %eax,%eax
|
||||
cpuid
|
||||
mov %eax,%r11d # max value for standard query level
|
||||
|
||||
xor %eax,%eax
|
||||
cmp \$0x756e6547,%ebx # "Genu"
|
||||
setne %al
|
||||
@ -60,7 +62,6 @@ OPENSSL_ia32_cpuid:
|
||||
cmp \$0x6c65746e,%ecx # "ntel"
|
||||
setne %al
|
||||
or %eax,%r9d # 0 indicates Intel CPU
|
||||
mov \$1,%r10d # "number of [AMD] cores"
|
||||
jz .Lintel
|
||||
|
||||
cmp \$0x68747541,%ebx # "Auth"
|
||||
@ -74,10 +75,10 @@ OPENSSL_ia32_cpuid:
|
||||
or %eax,%r10d # 0 indicates AMD CPU
|
||||
jnz .Lintel
|
||||
|
||||
# AMD specific
|
||||
mov \$0x80000000,%eax
|
||||
cpuid
|
||||
cmp \$0x80000008,%eax
|
||||
mov \$1,%r10d # "number of [AMD] cores"
|
||||
jb .Lintel
|
||||
|
||||
mov \$0x80000008,%eax
|
||||
@ -85,7 +86,29 @@ OPENSSL_ia32_cpuid:
|
||||
movzb %cl,%r10 # number of cores - 1
|
||||
inc %r10 # number of cores
|
||||
|
||||
mov \$1,%eax
|
||||
cpuid
|
||||
bt \$28,%edx # test hyper-threading bit
|
||||
jnc .Ldone
|
||||
shr \$16,%ebx # number of logical processors
|
||||
cmp %r10b,%bl
|
||||
ja .Ldone
|
||||
and \$0xefffffff,%edx # ~(1<<28)
|
||||
jmp .Ldone
|
||||
|
||||
.Lintel:
|
||||
cmp \$4,%r11d
|
||||
mov \$-1,%r10d
|
||||
jb .Lnocacheinfo
|
||||
|
||||
mov \$4,%eax
|
||||
mov \$0,%ecx # query L1D
|
||||
cpuid
|
||||
mov %eax,%r10d
|
||||
shr \$14,%r10d
|
||||
and \$0xfff,%r10d # number of cores -1 per L1D
|
||||
|
||||
.Lnocacheinfo:
|
||||
mov \$1,%eax
|
||||
cpuid
|
||||
cmp \$0,%r9d
|
||||
@ -98,8 +121,13 @@ OPENSSL_ia32_cpuid:
|
||||
.Lnotintel:
|
||||
bt \$28,%edx # test hyper-threading bit
|
||||
jnc .Ldone
|
||||
and \$0xefffffff,%edx # ~(1<<28)
|
||||
cmp \$0,%r10d
|
||||
je .Ldone
|
||||
|
||||
or \$0x10000000,%edx # 1<<28
|
||||
shr \$16,%ebx
|
||||
cmp %r10b,%bl # see if cache is shared
|
||||
cmp \$1,%bl # see if cache is shared
|
||||
ja .Ldone
|
||||
and \$0xefffffff,%edx # ~(1<<28)
|
||||
.Ldone:
|
||||
|
@ -23,6 +23,8 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
|
||||
&jnc (&label("done"));
|
||||
&xor ("eax","eax");
|
||||
&cpuid ();
|
||||
&mov ("edi","eax"); # max value for standard query level
|
||||
|
||||
&xor ("eax","eax");
|
||||
&cmp ("ebx",0x756e6547); # "Genu"
|
||||
&setne (&LB("eax"));
|
||||
@ -33,7 +35,6 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
|
||||
&cmp ("ecx",0x6c65746e); # "ntel"
|
||||
&setne (&LB("eax"));
|
||||
&or ("ebp","eax"); # 0 indicates Intel CPU
|
||||
&mov ("esi",1); # "number of [AMD] cores"
|
||||
&jz (&label("intel"));
|
||||
|
||||
&cmp ("ebx",0x68747541); # "Auth"
|
||||
@ -47,10 +48,10 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
|
||||
&or ("esi","eax"); # 0 indicates AMD CPU
|
||||
&jnz (&label("intel"));
|
||||
|
||||
# AMD specific
|
||||
&mov ("eax",0x80000000);
|
||||
&cpuid ();
|
||||
&cmp ("eax",0x80000008);
|
||||
&mov ("esi",1); # "number of [AMD] cores"
|
||||
&jb (&label("intel"));
|
||||
|
||||
&mov ("eax",0x80000008);
|
||||
@ -58,7 +59,30 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
|
||||
&movz ("esi",&LB("ecx")); # number of cores - 1
|
||||
&inc ("esi"); # number of cores
|
||||
|
||||
&mov ("eax",1);
|
||||
&cpuid ();
|
||||
&bt ("edx",28);
|
||||
&jnc (&label("done"));
|
||||
&shr ("ebx",16);
|
||||
&and ("ebx",0xff);
|
||||
&cmp ("ebx","esi");
|
||||
&ja (&label("done"));
|
||||
&and ("edx",0xefffffff); # clear hyper-threading bit
|
||||
&jmp (&label("done"));
|
||||
|
||||
&set_label("intel");
|
||||
&cmp ("edi",4);
|
||||
&mov ("edi",-1);
|
||||
&jb (&label("nocacheinfo"));
|
||||
|
||||
&mov ("eax",4);
|
||||
&mov ("ecx",0); # query L1D
|
||||
&cpuid ();
|
||||
&mov ("edi","eax");
|
||||
&shr ("edi",14);
|
||||
&and ("edi",0xfff); # number of cores -1 per L1D
|
||||
|
||||
&set_label("nocacheinfo");
|
||||
&mov ("eax",1);
|
||||
&cpuid ();
|
||||
&cmp ("ebp",0);
|
||||
@ -70,17 +94,19 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
|
||||
&set_label("notP4");
|
||||
&bt ("edx",28); # test hyper-threading bit
|
||||
&jnc (&label("done"));
|
||||
&and ("edx",0xefffffff);
|
||||
&cmp ("edi",0);
|
||||
&je (&label("done"));
|
||||
|
||||
&or ("edx",0x10000000);
|
||||
&shr ("ebx",16);
|
||||
&and ("ebx",0xff);
|
||||
&cmp ("ebx","esi"); # see if cache is shared(*)
|
||||
&cmp (&LB("ebx"),1);
|
||||
&ja (&label("done"));
|
||||
&and ("edx",0xefffffff); # clear hyper-threading bit if not
|
||||
&set_label("done");
|
||||
&mov ("eax","edx");
|
||||
&mov ("edx","ecx");
|
||||
&function_end("OPENSSL_ia32_cpuid");
|
||||
# (*) on Core2 this value is set to 2 denoting the fact that L2
|
||||
# cache is shared between cores.
|
||||
|
||||
&external_label("OPENSSL_ia32cap_P");
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user