From 4d524040bc81d2db46a5530ba10a98686ab1c3ca Mon Sep 17 00:00:00 2001 From: Andy Polyakov Date: Sat, 22 Oct 2005 17:57:18 +0000 Subject: [PATCH] Change bn_mul_mont declaration and BN_MONT_CTX. Update CHANGES. --- CHANGES | 6 +++++ crypto/bn/asm/sparcv9a-mont.pl | 4 +-- crypto/bn/asm/x86-mont.pl | 3 ++- crypto/bn/asm/x86_64-mont.pl | 4 ++- crypto/bn/bn.h | 4 +-- crypto/bn/bn_asm.c | 8 +++--- crypto/bn/bn_mont.c | 48 +++++++++++++++++++++++++++++----- 7 files changed, 62 insertions(+), 15 deletions(-) diff --git a/CHANGES b/CHANGES index 89fdaabc75..6c1a127ec9 100644 --- a/CHANGES +++ b/CHANGES @@ -4,6 +4,12 @@ Changes between 0.9.8a and 0.9.9 [xx XXX xxxx] + *) New candidate for BIGNUM assembler implementation, bn_mul_mont, + dedicated Montgomery multiplication procedure, is introduced. + BN_MONT_CTX is modified to allow bn_mul_mont to reach for higher + "64-bit" performance on certain 32-bit targets. + [Andy Polyakov] + *) New option SSL_OP_NO_COMP to disable use of compression selectively in SSL structures. New SSL ctrl to set maximum send fragment size. Save memory by seeting the I/O buffer sizes dynamically instead of diff --git a/crypto/bn/asm/sparcv9a-mont.pl b/crypto/bn/asm/sparcv9a-mont.pl index efdf03b5f0..3855295c85 100755 --- a/crypto/bn/asm/sparcv9a-mont.pl +++ b/crypto/bn/asm/sparcv9a-mont.pl @@ -70,7 +70,7 @@ $rp="%i0"; # BN_ULONG *rp, $ap="%i1"; # const BN_ULONG *ap, $bp="%i2"; # const BN_ULONG *bp, $np="%i3"; # const BN_ULONG *np, -$n0="%i4"; # BN_ULONG n0, +$n0="%i4"; # const BN_ULONG *n0, $num="%i5"; # int num); $tp="%l0"; @@ -125,7 +125,7 @@ $fname: sethi %hi(0xffff),$mask sll $num,3,$num ! num*=8 or $mask,%lo(0xffff),$mask - mov %i4,$n0 ! reassigned, remember? + ldx [%i4],$n0 ! reassigned, remember? add %sp,$bias,%o0 ! real top of stack sll $num,2,%o1 diff --git a/crypto/bn/asm/x86-mont.pl b/crypto/bn/asm/x86-mont.pl index e250e9907c..8d01b7a87f 100755 --- a/crypto/bn/asm/x86-mont.pl +++ b/crypto/bn/asm/x86-mont.pl @@ -66,7 +66,7 @@ if($sse2) { &mov ("ebx",&wparam(1)); # const BN_ULONG *ap &mov ("ecx",&wparam(2)); # const BN_ULONG *bp &mov ("edx",&wparam(3)); # const BN_ULONG *np - &mov ("esi",&wparam(4)); # BN_ULONG n0 + &mov ("esi",&wparam(4)); # const BN_ULONG *n0 &mov ($num,&wparam(5)); # int num &mov ("edi","esp"); # saved stack pointer! @@ -78,6 +78,7 @@ if($sse2) { &sub ($num,1); # num is restored to its original value # and will remain constant from now... + &mov ("esi",&DWP(0,"esi")); # pull n0[0] &mov ($_rp,"eax"); # ... save a copy of argument block &mov ($_ap,"ebx"); &mov ($_bp,"ecx"); diff --git a/crypto/bn/asm/x86_64-mont.pl b/crypto/bn/asm/x86_64-mont.pl index f3d112f8ff..e6943b5343 100755 --- a/crypto/bn/asm/x86_64-mont.pl +++ b/crypto/bn/asm/x86_64-mont.pl @@ -22,7 +22,7 @@ $rp="%rdi"; # BN_ULONG *rp, $ap="%rsi"; # const BN_ULONG *ap, $bp="%rdx"; # const BN_ULONG *bp, $np="%rcx"; # const BN_ULONG *np, -$n0="%r8"; # BN_ULONG n0, +$n0="%r8"; # const BN_ULONG *n0, $num="%r9"; # int num); $lo0="%r10"; $hi0="%r11"; @@ -55,6 +55,8 @@ bn_mul_mont: mov %rbp,8(%rsp,$num,8) # tp[num+1]=%rsp mov %rdx,$bp # $bp reassigned, remember? + mov ($n0),$n0 # pull n0[0] value + xor $i,$i # i=0 xor $j,$j # j=0 diff --git a/crypto/bn/bn.h b/crypto/bn/bn.h index b0c8f09808..0b616a6142 100644 --- a/crypto/bn/bn.h +++ b/crypto/bn/bn.h @@ -295,7 +295,7 @@ struct bn_mont_ctx_st BIGNUM N; /* The modulus */ BIGNUM Ni; /* R*(1/R mod N) - N*Ni = 1 * (Ni is only stored for bignum algorithm) */ - BN_ULONG n0; /* least significant word of Ni */ + BN_ULONG n0[2];/* least significant word(s) of Ni */ int flags; }; @@ -729,7 +729,7 @@ int RAND_pseudo_bytes(unsigned char *buf,int num); bn_pollute(a); \ } -int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num); +int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0, int num); BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w); BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w); void bn_sqr_words(BN_ULONG *rp, const BN_ULONG *ap, int num); diff --git a/crypto/bn/bn_asm.c b/crypto/bn/bn_asm.c index cd50b182b7..acb9937504 100644 --- a/crypto/bn/bn_asm.c +++ b/crypto/bn/bn_asm.c @@ -842,9 +842,9 @@ void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) * versions. Assembler vs. assembler improvement coefficients can * [and are known to] differ and are to be documented elsewhere. */ -int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,BN_ULONG n0, int num) +int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0p, int num) { - BN_ULONG c0,c1,ml,*tp; + BN_ULONG c0,c1,ml,*tp,n0; #ifdef mul64 BN_ULONG mh; #endif @@ -852,10 +852,12 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U int i=0,j; #if 0 /* template for platform-specific implementation */ - if (ap==bp) return bn_sqr_mont(rp,ap,np,n0,num); + if (ap==bp) return bn_sqr_mont(rp,ap,np,n0p,num); #endif vp = tp = alloca((num+2)*sizeof(BN_ULONG)); + n0 = *n0p; + tp[num] = bn_mul_words(tp,ap,num,bp[0]); tp[num+1] = 0; goto enter; diff --git a/crypto/bn/bn_mont.c b/crypto/bn/bn_mont.c index 3a087fdce9..f035c18973 100644 --- a/crypto/bn/bn_mont.c +++ b/crypto/bn/bn_mont.c @@ -152,7 +152,7 @@ static int BN_from_montgomery_word(BIGNUM *ret, BIGNUM *r, BN_MONT_CTX *mont) #endif r->top=max; - n0=mont->n0; + n0=mont->n0[0]; #ifdef BN_COUNT fprintf(stderr,"word BN_from_montgomery_word %d * %d\n",nl,nl); @@ -323,16 +323,49 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) BIGNUM tmod; BN_ULONG buf[2]; + tmod.d=buf; + tmod.dmax=2; + tmod.neg=0; + mont->ri=(BN_num_bits(mod)+(BN_BITS2-1))/BN_BITS2*BN_BITS2; + +#if defined(OPENSSL_BN_ASM_MONT) && (BN_BITS2<=32) + BN_zero(R); + if (!(BN_set_bit(R,2*BN_BITS2))) goto err; + + tmod.top=0; + if (buf[0] = mod->d[0]) tmod.top=1; + if (buf[1] = mod->top>1 ? mod->d[1] : 0) tmod.top=2; + + if ((BN_mod_inverse(Ri,R,&tmod,ctx)) == NULL) + goto err; + if (!BN_lshift(Ri,Ri,2*BN_BITS2)) goto err; /* R*Ri */ + if (!BN_is_zero(Ri)) + { + if (!BN_sub_word(Ri,1)) goto err; + } + else /* if N mod word size == 1 */ + { + if (bn_expand(Ri,(int)sizeof(BN_ULONG)*2) == NULL) + goto err; + /* Ri-- (mod double word size) */ + Ri->neg=0; + Ri->d[0]=BN_MASK2; + Ri->d[1]=BN_MASK2; + Ri->top=2; + } + if (!BN_div(Ri,NULL,Ri,&tmod,ctx)) goto err; + /* Ni = (R*Ri-1)/N, + * keep only couple of least significant words: */ + mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0; + mont->n0[1] = (Ri->top > 1) ? Ri->d[1] : 0; +#else BN_zero(R); if (!(BN_set_bit(R,BN_BITS2))) goto err; /* R */ buf[0]=mod->d[0]; /* tmod = N mod word size */ buf[1]=0; - tmod.d=buf; tmod.top = buf[0] != 0 ? 1 : 0; - tmod.dmax=2; - tmod.neg=0; /* Ri = R^-1 mod N*/ if ((BN_mod_inverse(Ri,R,&tmod,ctx)) == NULL) goto err; @@ -348,7 +381,9 @@ int BN_MONT_CTX_set(BN_MONT_CTX *mont, const BIGNUM *mod, BN_CTX *ctx) if (!BN_div(Ri,NULL,Ri,&tmod,ctx)) goto err; /* Ni = (R*Ri-1)/N, * keep only least significant word: */ - mont->n0 = (Ri->top > 0) ? Ri->d[0] : 0; + mont->n0[0] = (Ri->top > 0) ? Ri->d[0] : 0; + mont->n0[1] = 0; +#endif } #else /* !MONT_WORD */ { /* bignum version */ @@ -384,7 +419,8 @@ BN_MONT_CTX *BN_MONT_CTX_copy(BN_MONT_CTX *to, BN_MONT_CTX *from) if (!BN_copy(&(to->N),&(from->N))) return NULL; if (!BN_copy(&(to->Ni),&(from->Ni))) return NULL; to->ri=from->ri; - to->n0=from->n0; + to->n0[0]=from->n0[0]; + to->n0[1]=from->n0[1]; return(to); }