Support for SPARC T4 MONT[MUL|SQR] instructions.

Submitted by: David Miller, Andy Polyakov
This commit is contained in:
Andy Polyakov 2012-11-17 10:34:11 +00:00
parent c7b7984ac9
commit 68c06bf6b2
7 changed files with 1386 additions and 10 deletions

View File

@ -130,7 +130,7 @@ my $x86_elf_asm="$x86_asm:elf";
my $x86_64_asm="x86_64cpuid.o:x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-gf2m.o modexp512-x86_64.o::aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_64.o aesni-sha1-x86_64.o::md5-x86_64.o:sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o::rc4-x86_64.o rc4-md5-x86_64.o:::wp-x86_64.o:cmll-x86_64.o cmll_misc.o:ghash-x86_64.o:e_padlock-x86_64.o";
my $ia64_asm="ia64cpuid.o:bn-ia64.o ia64-mont.o::aes_core.o aes_cbc.o aes-ia64.o::md5-ia64.o:sha1-ia64.o sha256-ia64.o sha512-ia64.o::rc4-ia64.o rc4_skey.o:::::ghash-ia64.o::void";
my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparcv9-gf2m.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o::::::camellia.o cmll_misc.o cmll_cbc.o cmllt4-sparcv9.o:ghash-sparcv9.o::void";
my $sparcv9_asm="sparcv9cap.o sparccpuid.o:bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o:des_enc-sparc.o fcrypt_b.o:aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o::md5-sparcv9.o:sha1-sparcv9.o sha256-sparcv9.o sha512-sparcv9.o::::::camellia.o cmll_misc.o cmll_cbc.o cmllt4-sparcv9.o:ghash-sparcv9.o::void";
my $sparcv8_asm=":sparcv8.o:des_enc-sparc.o fcrypt_b.o:::::::::::::void";
my $alpha_asm="alphacpuid.o:bn_asm.o alpha-mont.o:::::sha1-alpha.o:::::::ghash-alpha.o::void";
my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o::::::::";

51
TABLE
View File

@ -174,7 +174,7 @@ $sys_id =
$lflags =
$bn_ops = BN_LLONG RC2_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC2 BF_PTR
$cpuid_obj = sparcv9cap.o sparccpuid.o
$bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparcv9-gf2m.o
$bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o
$des_obj = des_enc-sparc.o fcrypt_b.o
$aes_obj = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o
$bf_obj =
@ -1782,6 +1782,39 @@ $ranlib =
$arflags =
$multilib =
*** debug-ben-debug-64
$cc = gcc
$cflags = -Wall -pedantic -DPEDANTIC -Wno-long-long -Wsign-compare -Wmissing-prototypes -Wshadow -Wformat -Werror -DCRYPTO_MDEBUG_ALL -DCRYPTO_MDEBUG_ABORT -DREF_CHECK -DOPENSSL_NO_DEPRECATED -DL_ENDIAN -DTERMIOS -g3 -O3
$unistd =
$thread_cflag = -pthread -D_THREAD_SAFE -D_REENTRANT
$sys_id =
$lflags =
$bn_ops = SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL
$cpuid_obj = x86_64cpuid.o
$bn_obj = x86_64-gcc.o x86_64-mont.o x86_64-mont5.o x86_64-gf2m.o modexp512-x86_64.o
$des_obj =
$aes_obj = aes-x86_64.o vpaes-x86_64.o bsaes-x86_64.o aesni-x86_64.o aesni-sha1-x86_64.o
$bf_obj =
$md5_obj = md5-x86_64.o
$sha1_obj = sha1-x86_64.o sha256-x86_64.o sha512-x86_64.o
$cast_obj =
$rc4_obj = rc4-x86_64.o rc4-md5-x86_64.o
$rmd160_obj =
$rc5_obj =
$wp_obj = wp-x86_64.o
$cmll_obj = cmll-x86_64.o cmll_misc.o
$modes_obj = ghash-x86_64.o
$engines_obj = e_padlock-x86_64.o
$perlasm_scheme = elf
$dso_scheme = dlfcn
$shared_target= bsd-gcc-shared
$shared_cflag = -fPIC
$shared_ldflag =
$shared_extension = .so.$(SHLIB_MAJOR).$(SHLIB_MINOR)
$ranlib =
$arflags =
$multilib =
*** debug-ben-macos
$cc = cc
$cflags = -Wall -pedantic -DPEDANTIC -Wno-long-long -Wsign-compare -Wmissing-prototypes -Wshadow -Wformat -Werror -DCRYPTO_MDEBUG_ALL -DCRYPTO_MDEBUG_ABORT -DREF_CHECK -DOPENSSL_NO_DEPRECATED -DOPENSSL_NO_ASM -DBN_DEBUG -DCONF_DEBUG -DDEBUG_SAFESTACK -DDEBUG_UNUSED -DOPENSSL_THREADS -D_REENTRANT -DDSO_DLFCN -DHAVE_DLFCN_H -arch i386 -O3 -DL_ENDIAN -g3 -pipe
@ -2616,7 +2649,7 @@ $sys_id = ULTRASPARC
$lflags = -lsocket -lnsl -ldl
$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK_LL DES_PTR DES_RISC1 DES_UNROLL BF_PTR
$cpuid_obj = sparcv9cap.o sparccpuid.o
$bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparcv9-gf2m.o
$bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o
$des_obj = des_enc-sparc.o fcrypt_b.o
$aes_obj = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o
$bf_obj =
@ -2649,7 +2682,7 @@ $sys_id = ULTRASPARC
$lflags = -lsocket -lnsl -ldl
$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR
$cpuid_obj = sparcv9cap.o sparccpuid.o
$bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparcv9-gf2m.o
$bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o
$des_obj = des_enc-sparc.o fcrypt_b.o
$aes_obj = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o
$bf_obj =
@ -4398,7 +4431,7 @@ $sys_id = ULTRASPARC
$lflags = -ldl
$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR
$cpuid_obj = sparcv9cap.o sparccpuid.o
$bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparcv9-gf2m.o
$bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o
$des_obj = des_enc-sparc.o fcrypt_b.o
$aes_obj = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o
$bf_obj =
@ -4596,7 +4629,7 @@ $sys_id = ULTRASPARC
$lflags = -ldl
$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR
$cpuid_obj = sparcv9cap.o sparccpuid.o
$bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparcv9-gf2m.o
$bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o
$des_obj = des_enc-sparc.o fcrypt_b.o
$aes_obj = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o
$bf_obj =
@ -5454,7 +5487,7 @@ $sys_id = ULTRASPARC
$lflags = -lsocket -lnsl -ldl
$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK_LL DES_PTR DES_RISC1 DES_UNROLL BF_PTR
$cpuid_obj = sparcv9cap.o sparccpuid.o
$bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparcv9-gf2m.o
$bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o
$des_obj = des_enc-sparc.o fcrypt_b.o
$aes_obj = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o
$bf_obj =
@ -5487,7 +5520,7 @@ $sys_id = ULTRASPARC
$lflags = -lsocket -lnsl -ldl
$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_UNROLL BF_PTR
$cpuid_obj = sparcv9cap.o sparccpuid.o
$bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparcv9-gf2m.o
$bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o
$des_obj = des_enc-sparc.o fcrypt_b.o
$aes_obj = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o
$bf_obj =
@ -5586,7 +5619,7 @@ $sys_id = ULTRASPARC
$lflags = -lsocket -lnsl -ldl
$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR
$cpuid_obj = sparcv9cap.o sparccpuid.o
$bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparcv9-gf2m.o
$bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o
$des_obj = des_enc-sparc.o fcrypt_b.o
$aes_obj = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o
$bf_obj =
@ -5619,7 +5652,7 @@ $sys_id = ULTRASPARC
$lflags = -lsocket -lnsl -ldl
$bn_ops = BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_PTR DES_RISC1 DES_UNROLL BF_PTR
$cpuid_obj = sparcv9cap.o sparccpuid.o
$bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparcv9-gf2m.o
$bn_obj = bn-sparcv9.o sparcv9-mont.o sparcv9a-mont.o vis3-mont.o sparct4-mont.o sparcv9-gf2m.o
$des_obj = des_enc-sparc.o fcrypt_b.o
$aes_obj = aes_core.o aes_cbc.o aes-sparcv9.o aest4-sparcv9.o
$bf_obj =

View File

@ -79,6 +79,8 @@ sparcv9-mont.s: asm/sparcv9-mont.pl
$(PERL) asm/sparcv9-mont.pl $(CFLAGS) > $@
vis3-mont.s: asm/vis3-mont.pl
$(PERL) asm/vis3-mont.pl $(CFLAGS) > $@
sparct4-mont.S: asm/sparct4-mont.pl
$(PERL) asm/sparct4-mont.pl $(CFLAGS) > $@
sparcv9-gf2m.S: asm/sparcv9-gf2m.pl
$(PERL) asm/sparcv9-gf2m.pl $(CFLAGS) > $@

1201
crypto/bn/asm/sparct4-mont.pl Executable file

File diff suppressed because it is too large Load Diff

View File

@ -126,6 +126,11 @@
# endif
#endif
#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc__)
# include "sparc_arch.h"
extern unsigned int OPENSSL_sparcv9cap_P[];
#endif
/* maximum precomputation table size for *variable* sliding windows */
#define TABLE_SIZE 32
@ -588,6 +593,9 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
int powerbufLen = 0;
unsigned char *powerbuf=NULL;
BIGNUM tmp, am;
#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc__)
unsigned int t4=0;
#endif
bn_check_top(a);
bn_check_top(p);
@ -622,9 +630,18 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
/* Get the window size to use with size of p. */
window = BN_window_bits_for_ctime_exponent_size(bits);
#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc__)
if (window>=5 && (top&15)==0 && top<=64 &&
(OPENSSL_sparcv9cap_P[1]&(CFR_MONTMUL|CFR_MONTSQR))==
(CFR_MONTMUL|CFR_MONTSQR) &&
(t4=OPENSSL_sparcv9cap_P[0]))
window=5;
else
#endif
#if defined(OPENSSL_BN_ASM_MONT5)
if (window==6 && bits<=1024) window=5; /* ~5% improvement of 2048-bit RSA sign */
#endif
(void)0;
/* Allocate a buffer large enough to hold all of the pre-computed
* powers of am, am itself and tmp.
@ -674,6 +691,94 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
}
else if (!BN_to_montgomery(&am,a,mont,ctx)) goto err;
#if defined(OPENSSL_BN_ASM_MONT) && defined(__sparc__)
if (t4)
{
typedef int (*bn_pwr5_mont_f)(BN_ULONG *tp,const BN_ULONG *np,
const BN_ULONG *n0,const void *table,int power);
int bn_pwr5_mont_t4_8(BN_ULONG *tp,const BN_ULONG *np,
const BN_ULONG *n0,const void *table,int power);
int bn_pwr5_mont_t4_16(BN_ULONG *tp,const BN_ULONG *np,
const BN_ULONG *n0,const void *table,int power);
int bn_pwr5_mont_t4_24(BN_ULONG *tp,const BN_ULONG *np,
const BN_ULONG *n0,const void *table,int power);
int bn_pwr5_mont_t4_32(BN_ULONG *tp,const BN_ULONG *np,
const BN_ULONG *n0,const void *table,int power);
static const bn_pwr5_mont_f funcs[4] = {
bn_pwr5_mont_t4_8, bn_pwr5_mont_t4_16,
bn_pwr5_mont_t4_24, bn_pwr5_mont_t4_32 };
bn_pwr5_mont_f worker = funcs[top/16-1];
void bn_mul_mont_t4(BN_ULONG *rp,const BN_ULONG *ap,
const void *bp,const BN_ULONG *np,
const BN_ULONG *n0,int num);
void bn_mul_mont_gather5_t4(BN_ULONG *rp,const BN_ULONG *ap,
const void *table,const BN_ULONG *np,
const BN_ULONG *n0,int num,int power);
void bn_scatter5_t4(const BN_ULONG *inp,size_t num,
void *table,size_t power);
void bn_gather5_t4(BN_ULONG *out,size_t num,
void *table,size_t power);
void bn_flip_t4(BN_ULONG *dst,BN_ULONG *src,size_t num);
BN_ULONG *np=alloca(top*sizeof(BN_ULONG)), *n0=mont->n0;
/* BN_to_montgomery can contaminate words above .top
* [in BN_DEBUG[_DEBUG] build]... */
for (i=am.top; i<top; i++) am.d[i]=0;
for (i=tmp.top; i<top; i++) tmp.d[i]=0;
/* switch to 64-bit domain */
top /= 2;
bn_flip_t4(np,mont->N.d,top);
bn_flip_t4(tmp.d,tmp.d,top);
bn_flip_t4(am.d,am.d,top);
bn_scatter5_t4(tmp.d,top,powerbuf,0);
bn_scatter5_t4(am.d,top,powerbuf,1);
bn_mul_mont_t4(tmp.d,am.d,am.d,np,n0,top);
bn_scatter5_t4(tmp.d,top,powerbuf,2);
for (i=3; i<32; i++)
{
/* Calculate a^i = a^(i-1) * a */
bn_mul_mont_gather5_t4(tmp.d,am.d,powerbuf,np,n0,top,i-1);
bn_scatter5_t4(tmp.d,top,powerbuf,i);
}
bits--;
for (wvalue=0, i=bits%5; i>=0; i--,bits--)
wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);
bn_gather5_t4(tmp.d,top,powerbuf,wvalue);
/* Scan the exponent one window at a time starting from the most
* significant bits.
*/
while (bits >= 0)
{
for (wvalue=0, i=0; i<5; i++,bits--)
wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);
if ((*worker)(tmp.d,np,n0,powerbuf,wvalue)) continue;
/* retry once and fall back */
if ((*worker)(tmp.d,np,n0,powerbuf,wvalue)) continue;
bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top);
bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top);
bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top);
bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top);
bn_mul_mont_t4(tmp.d,tmp.d,tmp.d,np,n0,top);
bn_mul_mont_gather5_t4(tmp.d,tmp.d,powerbuf,np,n0,top,wvalue);
}
bn_flip_t4(tmp.d,tmp.d,top);
top *= 2;
/* back to 32-bit domain */
tmp.top=top;
bn_correct_top(&tmp);
OPENSSL_cleanse(np,top*sizeof(BN_ULONG));
}
else
#endif
#if defined(OPENSSL_BN_ASM_MONT5)
/* This optimization uses ideas from http://eprint.iacr.org/2011/239,
* specifically optimization of cache-timing attack countermeasures

View File

@ -9,6 +9,7 @@
#define SPARCV9_BLK (1<<5) /* VIS1 block copy */
#define SPARCV9_VIS3 (1<<6)
#define SPARCV9_RANDOM (1<<7)
#define SPARCV9_64BIT_STACK (1<<8)
/*
* OPENSSL_sparcv9cap_P[1] is copy of Compatibility Feature Register,
@ -65,6 +66,7 @@
# define SIZE_T_CC %xcc
# define STACK_FRAME 192
# define STACK_BIAS 2047
# define STACK_7thARG (STACK_BIAS+176)
#else
@ -74,6 +76,7 @@
# define SIZE_T_CC %icc
# define STACK_FRAME 112
# define STACK_BIAS 0
# define STACK_7thARG 92
# define SPARC_LOAD_ADDRESS_LEAF(SYM,reg,tmp) SPARC_LOAD_ADDRESS(SYM,reg)
#endif

View File

@ -4,6 +4,7 @@
#include <setjmp.h>
#include <signal.h>
#include <sys/time.h>
#include <unistd.h>
#include <openssl/bn.h>
#include "sparc_arch.h"
@ -21,6 +22,25 @@ int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_U
if (!(num&1) && num>=6)
{
if ((num&15)==0 && num<=64 &&
(OPENSSL_sparcv9cap_P[1]&(CFR_MONTMUL|CFR_MONTSQR))==
(CFR_MONTMUL|CFR_MONTSQR))
{
typedef int (*bn_mul_mont_f)(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0);
int bn_mul_mont_t4_8(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0);
int bn_mul_mont_t4_16(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0);
int bn_mul_mont_t4_24(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0);
int bn_mul_mont_t4_32(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np,const BN_ULONG *n0);
static const bn_mul_mont_f funcs[4] = {
bn_mul_mont_t4_8, bn_mul_mont_t4_16,
bn_mul_mont_t4_24, bn_mul_mont_t4_32 };
bn_mul_mont_f worker = funcs[num/16-1];
if ((*worker)(rp,ap,bp,np,n0)) return 1;
/* retry once and fall back */
if ((*worker)(rp,ap,bp,np,n0)) return 1;
return bn_mul_mont_vis3(rp,ap,bp,np,n0,num);
}
if ((OPENSSL_sparcv9cap_P[0]&SPARCV9_VIS3))
return bn_mul_mont_vis3(rp,ap,bp,np,n0,num);
else if (num>=8 &&
@ -289,6 +309,18 @@ void OPENSSL_cpuid_setup(void)
sigaction(SIGILL,&ill_oact,NULL);
sigprocmask(SIG_SETMASK,&oset,NULL);
if (sizeof(size_t)==8)
OPENSSL_sparcv9cap_P[0] |= SPARCV9_64BIT_STACK;
#ifdef __linux
else
{
int ret = syscall(340);
if (ret>=0 && ret&1)
OPENSSL_sparcv9cap_P[0] |= SPARCV9_64BIT_STACK;
}
#endif
}
#endif