Add assembly CRYPTO_memcmp.

GH: #102

Reviewed-by: Richard Levitte <levitte@openssl.org>
This commit is contained in:
Andy Polyakov 2016-05-15 17:01:15 +02:00
parent 2e6d7799ff
commit e33826f01b
12 changed files with 318 additions and 11 deletions

View File

@ -134,6 +134,34 @@ OPENSSL_cleanse:
bne $17,.Little
.Ldone: ret ($26)
.end OPENSSL_cleanse
.globl CRYPTO_memcmp
.ent CRYPTO_memcmp
CRYPTO_memcmp:
.frame $30,0,$26
.prologue 0
xor $0,$0,$0
beq $18,.Lno_data
xor $1,$1,$1
nop
.Loop_cmp:
ldq_u $2,0($16)
subq $18,1,$18
ldq_u $3,0($17)
extbl $2,$16,$2
lda $16,1($16)
extbl $3,$17,$3
lda $17,1($17)
xor $3,$2,$2
or $2,$0,$0
bne $18,.Loop_cmp
subq $31,$0,$0
srl $0,63,$0
.Lno_data:
ret ($26)
.end CRYPTO_memcmp
___
{
my ($out,$cnt,$max)=("\$16","\$17","\$18");

View File

@ -100,6 +100,26 @@ OPENSSL_cleanse:
cbnz x1,.Little // len!=0?
ret
.size OPENSSL_cleanse,.-OPENSSL_cleanse
.globl CRYPTO_memcmp
.type CRYPTO_memcmp,%function
.align 4
CRYPTO_memcmp:
eor w3,w3,w3
cbz x2,.Lno_data // len==0?
.Loop_cmp:
ldrb w4,[x0],#1
ldrb w5,[x1],#1
eor w4,w4,w5
orr w3,w3,w4
subs x2,x2,#1
b.ne .Loop_cmp
.Lno_data:
neg w0,w3
lsr w0,w0,#31
ret
.size CRYPTO_memcmp,.-CRYPTO_memcmp
___
print $code;

View File

@ -105,6 +105,36 @@ OPENSSL_cleanse:
#endif
.size OPENSSL_cleanse,.-OPENSSL_cleanse
.global CRYPTO_memcmp
.type CRYPTO_memcmp,%function
.align 4
CRYPTO_memcmp:
eor ip,ip,ip
cmp r2,#0
beq .Lno_data
stmdb sp!,{r4,r5}
.Loop_cmp:
ldrb r4,[r0],#1
ldrb r5,[r1],#1
eor r4,r4,r5
orr ip,ip,r4
subs r2,r2,#1
bne .Loop_cmp
ldmia sp!,{r4,r5}
.Lno_data:
neg r0,ip
mov r0,r0,lsr#31
#if __ARM_ARCH__>=5
bx lr
#else
tst lr,#1
moveq pc,lr
.word 0xe12fff1e @ bx lr
#endif
.size CRYPTO_memcmp,.-CRYPTO_memcmp
#if __ARM_MAX_ARCH__>=7
.arch armv7-a
.fpu neon

View File

@ -18,6 +18,7 @@ $code.=<<___;
.if __TI_EABI__
.asg OPENSSL_rdtsc,_OPENSSL_rdtsc
.asg OPENSSL_cleanse,_OPENSSL_cleanse
.asg CRYPTO_memcmp,_CRYPTO_memcmp
.asg OPENSSL_atomic_add,_OPENSSL_atomic_add
.asg OPENSSL_wipe_cpu,_OPENSSL_wipe_cpu
.asg OPENSSL_instrument_bus,_OPENSSL_instrument_bus
@ -87,6 +88,29 @@ _OPENSSL_cleanse:
[A1] STB A2,*A4++[2]
.endasmfunc
.global _CRYPTO_memcmp
_CRYPTO_memcmp:
.asmfunc
MV A6,B0
[!B0] BNOP RA
||[!B0] ZERO A4
[B0] MVC B0,ILC
|| [B0] ZERO A0
NOP 4
SPLOOP 1
LDBU *A4++,A1
|| LDBU *B4++,B1
NOP 4
XOR.L B1,A1,A2
SPKERNEL 1,0
|| OR.S A2,A0,A0
BNOP RA,3
ZERO.L A4
[A0] MVK 1,A4
.endasmfunc
.global _OPENSSL_atomic_add
_OPENSSL_atomic_add:
.asmfunc

View File

@ -316,6 +316,7 @@ void OPENSSL_die(const char *message, const char *file, int line)
#endif
}
#if !defined(OPENSSL_CPUID_OBJ)
/* volatile unsigned char* pointers are there because
* 1. Accessing a variable declared volatile via a pointer
* that lacks a volatile qualifier causes undefined behavior.
@ -347,3 +348,4 @@ int CRYPTO_memcmp(const volatile void * volatile in_a,
return x;
}
#endif

View File

@ -2,6 +2,12 @@
// On Win64i compile with ias.exe.
.text
#if defined(_HPUX_SOURCE) && !defined(_LP64)
#define ADDP addp4
#else
#define ADDP add
#endif
.global OPENSSL_cpuid_setup#
.proc OPENSSL_cpuid_setup#
OPENSSL_cpuid_setup:
@ -131,9 +137,7 @@ OPENSSL_wipe_cpu:
.proc OPENSSL_cleanse#
OPENSSL_cleanse:
{ .mib; cmp.eq p6,p0=0,r33 // len==0
#if defined(_HPUX_SOURCE) && !defined(_LP64)
addp4 r32=0,r32
#endif
ADDP r32=0,r32
(p6) br.ret.spnt b0 };;
{ .mib; and r2=7,r32
cmp.leu p6,p0=15,r33 // len>=15
@ -166,14 +170,51 @@ OPENSSL_cleanse:
(p6) br.ret.sptk.many b0 };;
.endp OPENSSL_cleanse#
.global CRYPTO_memcmp#
.proc CRYPTO_memcmp#
.align 32
.skip 16
CRYPTO_memcmp:
.prologue
{ .mib; mov r8=0
cmp.eq p6,p0=0,r34 // len==0?
(p6) br.ret.spnt b0 };;
.save ar.pfs,r2
{ .mib; alloc r2=ar.pfs,3,5,0,8
.save ar.lc,r3
mov r3=ar.lc
brp.loop.imp .Loop_cmp_ctop,.Loop_cmp_cend-16
}
{ .mib; sub r10=r34,r0,1
.save pr,r9
mov r9=pr };;
{ .mii; ADDP r16=0,r32
mov ar.lc=r10
mov ar.ec=4 }
{ .mib; ADDP r17=0,r33
mov pr.rot=1<<16 };;
.Loop_cmp_ctop:
{ .mib; (p16) ld1 r32=[r16],1
(p18) xor r34=r34,r38 }
{ .mib; (p16) ld1 r36=[r17],1
(p19) or r8=r8,r35
br.ctop.sptk .Loop_cmp_ctop };;
.Loop_cmp_cend:
{ .mib; cmp.ne p6,p0=0,r8
mov ar.lc=r3 };;
{ .mib;
(p6) mov r8=1
mov pr=r9,0x1ffff
br.ret.sptk.many b0 };;
.endp CRYPTO_memcmp#
.global OPENSSL_instrument_bus#
.proc OPENSSL_instrument_bus#
OPENSSL_instrument_bus:
{ .mmi; mov r2=r33
#if defined(_HPUX_SOURCE) && !defined(_LP64)
addp4 r32=0,r32
#endif
}
ADDP r32=0,r32 }
{ .mmi; mov r8=ar.itc;;
mov r10=r0
mov r9=r8 };;
@ -208,10 +249,7 @@ OPENSSL_instrument_bus:
.proc OPENSSL_instrument_bus2#
OPENSSL_instrument_bus2:
{ .mmi; mov r2=r33 // put aside cnt
#if defined(_HPUX_SOURCE) && !defined(_LP64)
addp4 r32=0,r32
#endif
}
ADDP r32=0,r32 }
{ .mmi; mov r8=ar.itc;;
mov r10=r0
mov r9=r8 };;

View File

@ -138,6 +138,37 @@ L\$done
___
}
{
my ($in1,$in2,$len)=("%r26","%r25","%r24");
$code.=<<___;
.EXPORT CRYPTO_memcmp,ENTRY,ARGW0=GR,ARGW1=GR,ARGW1=GR
.ALIGN 8
CRYPTO_memcmp
.PROC
.CALLINFO NO_CALLS
.ENTRY
cmpib,*= 0,$len,L\$no_data
xor $rv,$rv,$rv
L\$oop_cmp
ldb 0($in1),%r19
ldb 0($in2),%r20
ldo 1($in1),$in1
ldo 1($in2),$in2
xor %r19,%r20,%r29
addib,*<> -1,$len,L\$oop_cmp
or %r29,$rv,$rv
sub %r0,$rv,%r29
extru %r29,31,1,$rv
L\$no_data
bv ($rp)
.EXIT
nop
.PROCEND
___
}
{
my ($out,$cnt,$max)=("%r26","%r25","%r24");
my ($tick,$lasttick)=("%r23","%r22");
my ($diff,$lastdiff)=("%r21","%r20");

View File

@ -177,6 +177,32 @@ Laligned:
.byte 0,12,0x14,0,0,0,2,0
.long 0
.size .OPENSSL_cleanse,.-.OPENSSL_cleanse
globl .CRYPTO_memcmp
.align 4
.CRYPTO_memcmp:
$CMPLI r5,0
li r0,0
beq Lno_data
mtctr r5
Loop_cmp:
lbz r6,0(r3)
addi r3,r3,1
lbz r7,0(r4)
addi r4,r4,1
xor r6,r6,r7
or r0,r0,r6
bdnz Loop_cmp
Lno_data:
li r3,0
sub r3,r3,r0
extrwi r3,r3,1,0
blr
.long 0
.byte 0,12,0x14,0,0,0,3,0
.long 0
.size .CRYPTO_memcmp,.-.CRYPTO_memcmp
___
{
my ($out,$cnt,$max)=("r3","r4","r5");

View File

@ -125,6 +125,33 @@ OPENSSL_cleanse:
br %r14
.size OPENSSL_cleanse,.-OPENSSL_cleanse
.globl CRYPTO_memcmp
.type CRYPTO_memcmp,@function
.align 16
CRYPTO_memcmp:
#if !defined(__s390x__) && !defined(__s390x)
llgfr %r4,%r4
#endif
lghi %r5,0
clgr %r4,%r5
je .Lno_data
.Loop_cmp:
llc %r0,0(%r2)
la %r2,1(%r2)
llc %r1,0(%r3)
la %r3,1(%r3)
xr %r1,%r0
or %r5,%r1
brctg %r4,.Loop_cmp
lnr %r5,%r5
srl %r5,31
.Lno_data:
lgr %r2,%r5
br %r14
.size CRYPTO_memcmp,.-CRYPTO_memcmp
.globl OPENSSL_instrument_bus
.type OPENSSL_instrument_bus,@function
.align 16

View File

@ -440,6 +440,40 @@ OPENSSL_cleanse:
.type OPENSSL_cleanse,#function
.size OPENSSL_cleanse,.-OPENSSL_cleanse
.global CRYPTO_memcmp
.align 16
CRYPTO_memcmp:
cmp %o2,0
#ifdef ABI64
beq,pn %xcc,.Lno_data
#else
beq .Lno_data
#endif
xor %g1,%g1,%g1
nop
.Loop_cmp:
ldub [%o0],%o3
add %o0,1,%o0
ldub [%o1],%o4
add %o1,1,%o1
subcc %o2,1,%o2
xor %o3,%o4,%o4
#ifdef ABI64
bnz %xcc,.Loop_cmp
#else
bnz .Loop_cmp
#endif
or %o4,%g1,%g1
sub %g0,%g1,%g1
srl %g1,31,%g1
.Lno_data:
retl
mov %g1,%o0
.type CRYPTO_memcmp,#function
.size CRYPTO_memcmp,.-CRYPTO_memcmp
.global _sparcv9_vis1_instrument_bus
.align 8
_sparcv9_vis1_instrument_bus:

View File

@ -224,6 +224,28 @@ OPENSSL_cleanse:
jne .Little
ret
.size OPENSSL_cleanse,.-OPENSSL_cleanse
.globl CRYPTO_memcmp
.type CRYPTO_memcmp,\@abi-omnipotent
.align 16
CRYPTO_memcmp:
xor %rax,%rax
xor %r10,%r10
cmp \$0,$arg3
je .Lno_data
.Loop_cmp:
mov ($arg1),%r10b
lea 1($arg1),$arg1
xor ($arg2),%r10b
lea 1($arg2),$arg2
or %r10b,%al
dec $arg3
jnz .Loop_cmp
neg %rax
shr \$63,%rax
.Lno_data:
ret
.size CRYPTO_memcmp,.-CRYPTO_memcmp
___
print<<___ if (!$win64);

View File

@ -365,6 +365,31 @@ for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
&ret ();
&function_end_B("OPENSSL_cleanse");
&function_begin_B("CRYPTO_memcmp");
&push ("esi");
&push ("edi");
&mov ("esi",&wparam(0));
&mov ("edi",&wparam(1));
&mov ("ecx",&wparam(2));
&xor ("eax","eax");
&xor ("edx","edx");
&cmp ("ecx",0);
&je (&label("no_data"));
&set_label("loop");
&mov ("dl",&BP(0,"esi"));
&lea ("esi",&DWP(1,"esi"));
&xor ("dl",&BP(0,"edi"));
&lea ("edi",&DWP(1,"edi"));
&or ("al","dl");
&dec ("ecx");
&jnz (&label("loop"));
&neg ("eax");
&shr ("eax",31);
&set_label("no_data");
&pop ("edi");
&pop ("esi");
&ret ();
&function_end_B("CRYPTO_memcmp");
{
my $lasttick = "esi";
my $lastdiff = "ebx";