mirror of
https://github.com/openssl/openssl.git
synced 2024-12-14 20:43:46 +08:00
x86[_64]cpuid.pl: further refine shared cache detection.
This commit is contained in:
parent
6f71e5ee6a
commit
761393bba7
@ -50,6 +50,8 @@ OPENSSL_ia32_cpuid:
|
|||||||
|
|
||||||
xor %eax,%eax
|
xor %eax,%eax
|
||||||
cpuid
|
cpuid
|
||||||
|
mov %eax,%r11d # max value for standard query level
|
||||||
|
|
||||||
xor %eax,%eax
|
xor %eax,%eax
|
||||||
cmp \$0x756e6547,%ebx # "Genu"
|
cmp \$0x756e6547,%ebx # "Genu"
|
||||||
setne %al
|
setne %al
|
||||||
@ -60,7 +62,6 @@ OPENSSL_ia32_cpuid:
|
|||||||
cmp \$0x6c65746e,%ecx # "ntel"
|
cmp \$0x6c65746e,%ecx # "ntel"
|
||||||
setne %al
|
setne %al
|
||||||
or %eax,%r9d # 0 indicates Intel CPU
|
or %eax,%r9d # 0 indicates Intel CPU
|
||||||
mov \$1,%r10d # "number of [AMD] cores"
|
|
||||||
jz .Lintel
|
jz .Lintel
|
||||||
|
|
||||||
cmp \$0x68747541,%ebx # "Auth"
|
cmp \$0x68747541,%ebx # "Auth"
|
||||||
@ -74,10 +75,10 @@ OPENSSL_ia32_cpuid:
|
|||||||
or %eax,%r10d # 0 indicates AMD CPU
|
or %eax,%r10d # 0 indicates AMD CPU
|
||||||
jnz .Lintel
|
jnz .Lintel
|
||||||
|
|
||||||
|
# AMD specific
|
||||||
mov \$0x80000000,%eax
|
mov \$0x80000000,%eax
|
||||||
cpuid
|
cpuid
|
||||||
cmp \$0x80000008,%eax
|
cmp \$0x80000008,%eax
|
||||||
mov \$1,%r10d # "number of [AMD] cores"
|
|
||||||
jb .Lintel
|
jb .Lintel
|
||||||
|
|
||||||
mov \$0x80000008,%eax
|
mov \$0x80000008,%eax
|
||||||
@ -85,7 +86,29 @@ OPENSSL_ia32_cpuid:
|
|||||||
movzb %cl,%r10 # number of cores - 1
|
movzb %cl,%r10 # number of cores - 1
|
||||||
inc %r10 # number of cores
|
inc %r10 # number of cores
|
||||||
|
|
||||||
|
mov \$1,%eax
|
||||||
|
cpuid
|
||||||
|
bt \$28,%edx # test hyper-threading bit
|
||||||
|
jnc .Ldone
|
||||||
|
shr \$16,%ebx # number of logical processors
|
||||||
|
cmp %r10b,%bl
|
||||||
|
ja .Ldone
|
||||||
|
and \$0xefffffff,%edx # ~(1<<28)
|
||||||
|
jmp .Ldone
|
||||||
|
|
||||||
.Lintel:
|
.Lintel:
|
||||||
|
cmp \$4,%r11d
|
||||||
|
mov \$-1,%r10d
|
||||||
|
jb .Lnocacheinfo
|
||||||
|
|
||||||
|
mov \$4,%eax
|
||||||
|
mov \$0,%ecx # query L1D
|
||||||
|
cpuid
|
||||||
|
mov %eax,%r10d
|
||||||
|
shr \$14,%r10d
|
||||||
|
and \$0xfff,%r10d # number of cores -1 per L1D
|
||||||
|
|
||||||
|
.Lnocacheinfo:
|
||||||
mov \$1,%eax
|
mov \$1,%eax
|
||||||
cpuid
|
cpuid
|
||||||
cmp \$0,%r9d
|
cmp \$0,%r9d
|
||||||
@ -98,8 +121,13 @@ OPENSSL_ia32_cpuid:
|
|||||||
.Lnotintel:
|
.Lnotintel:
|
||||||
bt \$28,%edx # test hyper-threading bit
|
bt \$28,%edx # test hyper-threading bit
|
||||||
jnc .Ldone
|
jnc .Ldone
|
||||||
|
and \$0xefffffff,%edx # ~(1<<28)
|
||||||
|
cmp \$0,%r10d
|
||||||
|
je .Ldone
|
||||||
|
|
||||||
|
or \$0x10000000,%edx # 1<<28
|
||||||
shr \$16,%ebx
|
shr \$16,%ebx
|
||||||
cmp %r10b,%bl # see if cache is shared
|
cmp \$1,%bl # see if cache is shared
|
||||||
ja .Ldone
|
ja .Ldone
|
||||||
and \$0xefffffff,%edx # ~(1<<28)
|
and \$0xefffffff,%edx # ~(1<<28)
|
||||||
.Ldone:
|
.Ldone:
|
||||||
|
@ -23,6 +23,8 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
|
|||||||
&jnc (&label("done"));
|
&jnc (&label("done"));
|
||||||
&xor ("eax","eax");
|
&xor ("eax","eax");
|
||||||
&cpuid ();
|
&cpuid ();
|
||||||
|
&mov ("edi","eax"); # max value for standard query level
|
||||||
|
|
||||||
&xor ("eax","eax");
|
&xor ("eax","eax");
|
||||||
&cmp ("ebx",0x756e6547); # "Genu"
|
&cmp ("ebx",0x756e6547); # "Genu"
|
||||||
&setne (&LB("eax"));
|
&setne (&LB("eax"));
|
||||||
@ -33,7 +35,6 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
|
|||||||
&cmp ("ecx",0x6c65746e); # "ntel"
|
&cmp ("ecx",0x6c65746e); # "ntel"
|
||||||
&setne (&LB("eax"));
|
&setne (&LB("eax"));
|
||||||
&or ("ebp","eax"); # 0 indicates Intel CPU
|
&or ("ebp","eax"); # 0 indicates Intel CPU
|
||||||
&mov ("esi",1); # "number of [AMD] cores"
|
|
||||||
&jz (&label("intel"));
|
&jz (&label("intel"));
|
||||||
|
|
||||||
&cmp ("ebx",0x68747541); # "Auth"
|
&cmp ("ebx",0x68747541); # "Auth"
|
||||||
@ -47,10 +48,10 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
|
|||||||
&or ("esi","eax"); # 0 indicates AMD CPU
|
&or ("esi","eax"); # 0 indicates AMD CPU
|
||||||
&jnz (&label("intel"));
|
&jnz (&label("intel"));
|
||||||
|
|
||||||
|
# AMD specific
|
||||||
&mov ("eax",0x80000000);
|
&mov ("eax",0x80000000);
|
||||||
&cpuid ();
|
&cpuid ();
|
||||||
&cmp ("eax",0x80000008);
|
&cmp ("eax",0x80000008);
|
||||||
&mov ("esi",1); # "number of [AMD] cores"
|
|
||||||
&jb (&label("intel"));
|
&jb (&label("intel"));
|
||||||
|
|
||||||
&mov ("eax",0x80000008);
|
&mov ("eax",0x80000008);
|
||||||
@ -58,7 +59,30 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
|
|||||||
&movz ("esi",&LB("ecx")); # number of cores - 1
|
&movz ("esi",&LB("ecx")); # number of cores - 1
|
||||||
&inc ("esi"); # number of cores
|
&inc ("esi"); # number of cores
|
||||||
|
|
||||||
|
&mov ("eax",1);
|
||||||
|
&cpuid ();
|
||||||
|
&bt ("edx",28);
|
||||||
|
&jnc (&label("done"));
|
||||||
|
&shr ("ebx",16);
|
||||||
|
&and ("ebx",0xff);
|
||||||
|
&cmp ("ebx","esi");
|
||||||
|
&ja (&label("done"));
|
||||||
|
&and ("edx",0xefffffff); # clear hyper-threading bit
|
||||||
|
&jmp (&label("done"));
|
||||||
|
|
||||||
&set_label("intel");
|
&set_label("intel");
|
||||||
|
&cmp ("edi",4);
|
||||||
|
&mov ("edi",-1);
|
||||||
|
&jb (&label("nocacheinfo"));
|
||||||
|
|
||||||
|
&mov ("eax",4);
|
||||||
|
&mov ("ecx",0); # query L1D
|
||||||
|
&cpuid ();
|
||||||
|
&mov ("edi","eax");
|
||||||
|
&shr ("edi",14);
|
||||||
|
&and ("edi",0xfff); # number of cores -1 per L1D
|
||||||
|
|
||||||
|
&set_label("nocacheinfo");
|
||||||
&mov ("eax",1);
|
&mov ("eax",1);
|
||||||
&cpuid ();
|
&cpuid ();
|
||||||
&cmp ("ebp",0);
|
&cmp ("ebp",0);
|
||||||
@ -70,17 +94,19 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
|
|||||||
&set_label("notP4");
|
&set_label("notP4");
|
||||||
&bt ("edx",28); # test hyper-threading bit
|
&bt ("edx",28); # test hyper-threading bit
|
||||||
&jnc (&label("done"));
|
&jnc (&label("done"));
|
||||||
|
&and ("edx",0xefffffff);
|
||||||
|
&cmp ("edi",0);
|
||||||
|
&je (&label("done"));
|
||||||
|
|
||||||
|
&or ("edx",0x10000000);
|
||||||
&shr ("ebx",16);
|
&shr ("ebx",16);
|
||||||
&and ("ebx",0xff);
|
&cmp (&LB("ebx"),1);
|
||||||
&cmp ("ebx","esi"); # see if cache is shared(*)
|
|
||||||
&ja (&label("done"));
|
&ja (&label("done"));
|
||||||
&and ("edx",0xefffffff); # clear hyper-threading bit if not
|
&and ("edx",0xefffffff); # clear hyper-threading bit if not
|
||||||
&set_label("done");
|
&set_label("done");
|
||||||
&mov ("eax","edx");
|
&mov ("eax","edx");
|
||||||
&mov ("edx","ecx");
|
&mov ("edx","ecx");
|
||||||
&function_end("OPENSSL_ia32_cpuid");
|
&function_end("OPENSSL_ia32_cpuid");
|
||||||
# (*) on Core2 this value is set to 2 denoting the fact that L2
|
|
||||||
# cache is shared between cores.
|
|
||||||
|
|
||||||
&external_label("OPENSSL_ia32cap_P");
|
&external_label("OPENSSL_ia32cap_P");
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user