Add support for sparc cryptographic hash opcodes.

* crypt/Makefile: Move test targets after toplevel Rules
	inclusion.  Grab any necessary sysdep routines when linking.
	* crypt/md5.c (md5_process_block): Remove define, we will always
	name it __md5_process_block.
	(md5_finish_ctx): Update md5_process_block call.
	(md5_stream): Likewise.
	(md5_process_bytes): Likewise.
	(md5_process_block): Rename to __md5_process_block and move to ...
	* crypt/md5-block.c: ... here.
	* crypt/sha256.c (sha256_process_block): Move to ...
	* crypt/sha256-block.c: ... here.
	* crypt/sha512.c (sha512_process_block): Move to ...
	* crypt/sha512-block.c: ... here.
	* locale/Makefile (CFLAGS-md5.c): Define to add crypt/ to include
	path.
	* sysdeps/sparc/sparc-ifunc.c (sparc_libc_ifunc): Define.
	* sysdeps/sparc/sparc64/multiarch/Makefile
	(libcrypt-sysdep_routines): Add crypto assembler sysdeps when in
	crypt subdir.
	(localedef-aux): Add md5 crypto assembler when in locale subdir.
	* sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile: Mirror sparc64
	multiarch changes.
	* sysdeps/sparc/sparc64/multiarch/md5-block.c: New file.
	* sysdeps/sparc/sparc64/multiarch/md5-crop.S: New file.
	* sysdeps/sparc/sparc64/multiarch/sha256-block.c: New file.
	* sysdeps/sparc/sparc64/multiarch/sha256-crop.S: New file.
	* sysdeps/sparc/sparc64/multiarch/sha512-block.c: New file.
	* sysdeps/sparc/sparc64/multiarch/sha512-crop.S: New file.
	* sysdeps/sparc/sparc32/sparcv9/multiarch/md5-block.c: New file.
	* sysdeps/sparc/sparc32/sparcv9/multiarch/md5-crop.S: New file.
	* sysdeps/sparc/sparc32/sparcv9/multiarch/sha256-block.c: New
	file.
	* sysdeps/sparc/sparc32/sparcv9/multiarch/sha256-crop.S: New file.
	* sysdeps/sparc/sparc32/sparcv9/multiarch/sha512-block.c: New
	file.
	* sysdeps/sparc/sparc32/sparcv9/multiarch/sha512-crop.S: New file.
This commit is contained in:
David S. Miller 2012-11-12 15:17:31 -08:00
parent e27d476a65
commit 3a0d900a99
24 changed files with 887 additions and 385 deletions

View File

@ -1,3 +1,42 @@
2012-11-13 David S. Miller <davem@davemloft.net>
* crypt/Makefile: Move test targets after toplevel Rules
inclusion. Grab any necessary sysdep routines when linking.
* crypt/md5.c (md5_process_block): Remove define, we will always
name it __md5_process_block.
(md5_finish_ctx): Update md5_process_block call.
(md5_stream): Likewise.
(md5_process_bytes): Likewise.
(md5_process_block): Rename to __md5_process_block and move to ...
* crypt/md5-block.c: ... here.
* crypt/sha256.c (sha256_process_block): Move to ...
* crypt/sha256-block.c: ... here.
* crypt/sha512.c (sha512_process_block): Move to ...
* crypt/sha512-block.c: ... here.
* locale/Makefile (CFLAGS-md5.c): Define to add crypt/ to include
path.
* sysdeps/sparc/sparc-ifunc.c (sparc_libc_ifunc): Define.
* sysdeps/sparc/sparc64/multiarch/Makefile
(libcrypt-sysdep_routines): Add crypto assembler sysdeps when in
crypt subdir.
(localedef-aux): Add md5 crypto assembler when in locale subdir.
* sysdeps/sparc/sparc32/sparcv9/multiarch/Makefile: Mirror sparc64
multiarch changes.
* sysdeps/sparc/sparc64/multiarch/md5-block.c: New file.
* sysdeps/sparc/sparc64/multiarch/md5-crop.S: New file.
* sysdeps/sparc/sparc64/multiarch/sha256-block.c: New file.
* sysdeps/sparc/sparc64/multiarch/sha256-crop.S: New file.
* sysdeps/sparc/sparc64/multiarch/sha512-block.c: New file.
* sysdeps/sparc/sparc64/multiarch/sha512-crop.S: New file.
* sysdeps/sparc/sparc32/sparcv9/multiarch/md5-block.c: New file.
* sysdeps/sparc/sparc32/sparcv9/multiarch/md5-crop.S: New file.
* sysdeps/sparc/sparc32/sparcv9/multiarch/sha256-block.c: New
file.
* sysdeps/sparc/sparc32/sparcv9/multiarch/sha256-crop.S: New file.
* sysdeps/sparc/sparc32/sparcv9/multiarch/sha512-block.c: New
file.
* sysdeps/sparc/sparc32/sparcv9/multiarch/sha512-crop.S: New file.
2012-11-13 Joseph Myers <joseph@codesourcery.com> 2012-11-13 Joseph Myers <joseph@codesourcery.com>
* timezone/tzselect.ksh: Update from tzcode git revision * timezone/tzselect.ksh: Update from tzcode git revision

View File

@ -49,15 +49,21 @@ tests += md5test sha256test sha512test
# The test md5test-giant uses up to 400 MB of RSS and runs on a fast # The test md5test-giant uses up to 400 MB of RSS and runs on a fast
# machine over a minute. # machine over a minute.
xtests = md5test-giant xtests = md5test-giant
$(objpfx)md5test: $(objpfx)md5.o
$(objpfx)md5test-giant: $(objpfx)md5.o
$(objpfx)sha256test: $(objpfx)sha256.o
$(objpfx)sha512test: $(objpfx)sha512.o
endif endif
include ../Rules include ../Rules
ifneq ($(nss-crypt),yes)
md5-routines := md5 $(filter md5%,$(libcrypt-sysdep_routines))
sha256-routines := sha256 $(filter sha256%,$(libcrypt-sysdep_routines))
sha512-routines := sha512 $(filter sha512%,$(libcrypt-sysdep_routines))
$(objpfx)md5test: $(patsubst %, $(objpfx)%.o,$(md5-routines))
$(objpfx)md5test-giant: $(patsubst %, $(objpfx)%.o,$(md5-routines))
$(objpfx)sha256test: $(patsubst %, $(objpfx)%.o,$(sha256-routines))
$(objpfx)sha512test: $(patsubst %, $(objpfx)%.o,$(sha512-routines))
endif
ifeq (yes,$(build-shared)) ifeq (yes,$(build-shared))
$(addprefix $(objpfx),$(tests)): $(objpfx)libcrypt.so $(addprefix $(objpfx),$(tests)): $(objpfx)libcrypt.so
else else

166
crypt/md5-block.c Normal file
View File

@ -0,0 +1,166 @@
/* These are the four functions used in the four steps of the MD5 algorithm
and defined in the RFC 1321. The first function is a little bit optimized
(as found in Colin Plumbs public domain implementation). */
/* #define FF(b, c, d) ((b & c) | (~b & d)) */
#define FF(b, c, d) (d ^ (b & (c ^ d)))
#define FG(b, c, d) FF (d, b, c)
#define FH(b, c, d) (b ^ c ^ d)
#define FI(b, c, d) (c ^ (b | ~d))
/* Process LEN bytes of BUFFER, accumulating context into CTX.
It is assumed that LEN % 64 == 0. */
void
__md5_process_block (const void *buffer, size_t len, struct md5_ctx *ctx)
{
md5_uint32 correct_words[16];
const md5_uint32 *words = buffer;
size_t nwords = len / sizeof (md5_uint32);
const md5_uint32 *endp = words + nwords;
md5_uint32 A = ctx->A;
md5_uint32 B = ctx->B;
md5_uint32 C = ctx->C;
md5_uint32 D = ctx->D;
md5_uint32 lolen = len;
/* First increment the byte count. RFC 1321 specifies the possible
length of the file up to 2^64 bits. Here we only compute the
number of bytes. Do a double word increment. */
ctx->total[0] += lolen;
ctx->total[1] += (len >> 31 >> 1) + (ctx->total[0] < lolen);
/* Process all bytes in the buffer with 64 bytes in each round of
the loop. */
while (words < endp)
{
md5_uint32 *cwp = correct_words;
md5_uint32 A_save = A;
md5_uint32 B_save = B;
md5_uint32 C_save = C;
md5_uint32 D_save = D;
/* First round: using the given function, the context and a constant
the next context is computed. Because the algorithms processing
unit is a 32-bit word and it is determined to work on words in
little endian byte order we perhaps have to change the byte order
before the computation. To reduce the work for the next steps
we store the swapped words in the array CORRECT_WORDS. */
#define OP(a, b, c, d, s, T) \
do \
{ \
a += FF (b, c, d) + (*cwp++ = SWAP (*words)) + T; \
++words; \
CYCLIC (a, s); \
a += b; \
} \
while (0)
/* It is unfortunate that C does not provide an operator for
cyclic rotation. Hope the C compiler is smart enough. */
#define CYCLIC(w, s) (w = (w << s) | (w >> (32 - s)))
/* Before we start, one word to the strange constants.
They are defined in RFC 1321 as
T[i] = (int) (4294967296.0 * fabs (sin (i))), i=1..64
*/
/* Round 1. */
OP (A, B, C, D, 7, 0xd76aa478);
OP (D, A, B, C, 12, 0xe8c7b756);
OP (C, D, A, B, 17, 0x242070db);
OP (B, C, D, A, 22, 0xc1bdceee);
OP (A, B, C, D, 7, 0xf57c0faf);
OP (D, A, B, C, 12, 0x4787c62a);
OP (C, D, A, B, 17, 0xa8304613);
OP (B, C, D, A, 22, 0xfd469501);
OP (A, B, C, D, 7, 0x698098d8);
OP (D, A, B, C, 12, 0x8b44f7af);
OP (C, D, A, B, 17, 0xffff5bb1);
OP (B, C, D, A, 22, 0x895cd7be);
OP (A, B, C, D, 7, 0x6b901122);
OP (D, A, B, C, 12, 0xfd987193);
OP (C, D, A, B, 17, 0xa679438e);
OP (B, C, D, A, 22, 0x49b40821);
/* For the second to fourth round we have the possibly swapped words
in CORRECT_WORDS. Redefine the macro to take an additional first
argument specifying the function to use. */
#undef OP
#define OP(f, a, b, c, d, k, s, T) \
do \
{ \
a += f (b, c, d) + correct_words[k] + T; \
CYCLIC (a, s); \
a += b; \
} \
while (0)
/* Round 2. */
OP (FG, A, B, C, D, 1, 5, 0xf61e2562);
OP (FG, D, A, B, C, 6, 9, 0xc040b340);
OP (FG, C, D, A, B, 11, 14, 0x265e5a51);
OP (FG, B, C, D, A, 0, 20, 0xe9b6c7aa);
OP (FG, A, B, C, D, 5, 5, 0xd62f105d);
OP (FG, D, A, B, C, 10, 9, 0x02441453);
OP (FG, C, D, A, B, 15, 14, 0xd8a1e681);
OP (FG, B, C, D, A, 4, 20, 0xe7d3fbc8);
OP (FG, A, B, C, D, 9, 5, 0x21e1cde6);
OP (FG, D, A, B, C, 14, 9, 0xc33707d6);
OP (FG, C, D, A, B, 3, 14, 0xf4d50d87);
OP (FG, B, C, D, A, 8, 20, 0x455a14ed);
OP (FG, A, B, C, D, 13, 5, 0xa9e3e905);
OP (FG, D, A, B, C, 2, 9, 0xfcefa3f8);
OP (FG, C, D, A, B, 7, 14, 0x676f02d9);
OP (FG, B, C, D, A, 12, 20, 0x8d2a4c8a);
/* Round 3. */
OP (FH, A, B, C, D, 5, 4, 0xfffa3942);
OP (FH, D, A, B, C, 8, 11, 0x8771f681);
OP (FH, C, D, A, B, 11, 16, 0x6d9d6122);
OP (FH, B, C, D, A, 14, 23, 0xfde5380c);
OP (FH, A, B, C, D, 1, 4, 0xa4beea44);
OP (FH, D, A, B, C, 4, 11, 0x4bdecfa9);
OP (FH, C, D, A, B, 7, 16, 0xf6bb4b60);
OP (FH, B, C, D, A, 10, 23, 0xbebfbc70);
OP (FH, A, B, C, D, 13, 4, 0x289b7ec6);
OP (FH, D, A, B, C, 0, 11, 0xeaa127fa);
OP (FH, C, D, A, B, 3, 16, 0xd4ef3085);
OP (FH, B, C, D, A, 6, 23, 0x04881d05);
OP (FH, A, B, C, D, 9, 4, 0xd9d4d039);
OP (FH, D, A, B, C, 12, 11, 0xe6db99e5);
OP (FH, C, D, A, B, 15, 16, 0x1fa27cf8);
OP (FH, B, C, D, A, 2, 23, 0xc4ac5665);
/* Round 4. */
OP (FI, A, B, C, D, 0, 6, 0xf4292244);
OP (FI, D, A, B, C, 7, 10, 0x432aff97);
OP (FI, C, D, A, B, 14, 15, 0xab9423a7);
OP (FI, B, C, D, A, 5, 21, 0xfc93a039);
OP (FI, A, B, C, D, 12, 6, 0x655b59c3);
OP (FI, D, A, B, C, 3, 10, 0x8f0ccc92);
OP (FI, C, D, A, B, 10, 15, 0xffeff47d);
OP (FI, B, C, D, A, 1, 21, 0x85845dd1);
OP (FI, A, B, C, D, 8, 6, 0x6fa87e4f);
OP (FI, D, A, B, C, 15, 10, 0xfe2ce6e0);
OP (FI, C, D, A, B, 6, 15, 0xa3014314);
OP (FI, B, C, D, A, 13, 21, 0x4e0811a1);
OP (FI, A, B, C, D, 4, 6, 0xf7537e82);
OP (FI, D, A, B, C, 11, 10, 0xbd3af235);
OP (FI, C, D, A, B, 2, 15, 0x2ad7d2bb);
OP (FI, B, C, D, A, 9, 21, 0xeb86d391);
/* Add the starting values of the context. */
A += A_save;
B += B_save;
C += C_save;
D += D_save;
}
/* Put checksum in context given as argument. */
ctx->A = A;
ctx->B = B;
ctx->C = C;
ctx->D = D;
}

View File

@ -44,7 +44,6 @@
/* We need to keep the namespace clean so define the MD5 function /* We need to keep the namespace clean so define the MD5 function
protected using leading __ . */ protected using leading __ . */
# define md5_init_ctx __md5_init_ctx # define md5_init_ctx __md5_init_ctx
# define md5_process_block __md5_process_block
# define md5_process_bytes __md5_process_bytes # define md5_process_bytes __md5_process_bytes
# define md5_finish_ctx __md5_finish_ctx # define md5_finish_ctx __md5_finish_ctx
# define md5_read_ctx __md5_read_ctx # define md5_read_ctx __md5_read_ctx
@ -126,7 +125,7 @@ md5_finish_ctx (ctx, resbuf)
(ctx->total[0] >> 29)); (ctx->total[0] >> 29));
/* Process last bytes. */ /* Process last bytes. */
md5_process_block (ctx->buffer, bytes + pad + 8, ctx); __md5_process_block (ctx->buffer, bytes + pad + 8, ctx);
return md5_read_ctx (ctx, resbuf); return md5_read_ctx (ctx, resbuf);
} }
@ -175,7 +174,7 @@ md5_stream (stream, resblock)
/* Process buffer with BLOCKSIZE bytes. Note that /* Process buffer with BLOCKSIZE bytes. Note that
BLOCKSIZE % 64 == 0 BLOCKSIZE % 64 == 0
*/ */
md5_process_block (buffer, BLOCKSIZE, &ctx); __md5_process_block (buffer, BLOCKSIZE, &ctx);
} }
/* Add the last bytes if necessary. */ /* Add the last bytes if necessary. */
@ -228,7 +227,7 @@ md5_process_bytes (buffer, len, ctx)
if (ctx->buflen > 64) if (ctx->buflen > 64)
{ {
md5_process_block (ctx->buffer, ctx->buflen & ~63, ctx); __md5_process_block (ctx->buffer, ctx->buflen & ~63, ctx);
ctx->buflen &= 63; ctx->buflen &= 63;
/* The regions in the following copy operation cannot overlap. */ /* The regions in the following copy operation cannot overlap. */
@ -254,14 +253,14 @@ md5_process_bytes (buffer, len, ctx)
if (UNALIGNED_P (buffer)) if (UNALIGNED_P (buffer))
while (len > 64) while (len > 64)
{ {
md5_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx); __md5_process_block (memcpy (ctx->buffer, buffer, 64), 64, ctx);
buffer = (const char *) buffer + 64; buffer = (const char *) buffer + 64;
len -= 64; len -= 64;
} }
else else
#endif #endif
{ {
md5_process_block (buffer, len & ~63, ctx); __md5_process_block (buffer, len & ~63, ctx);
buffer = (const char *) buffer + (len & ~63); buffer = (const char *) buffer + (len & ~63);
len &= 63; len &= 63;
} }
@ -276,7 +275,7 @@ md5_process_bytes (buffer, len, ctx)
left_over += len; left_over += len;
if (left_over >= 64) if (left_over >= 64)
{ {
md5_process_block (ctx->buffer, 64, ctx); __md5_process_block (ctx->buffer, 64, ctx);
left_over -= 64; left_over -= 64;
memcpy (ctx->buffer, &ctx->buffer[64], left_over); memcpy (ctx->buffer, &ctx->buffer[64], left_over);
} }
@ -284,173 +283,4 @@ md5_process_bytes (buffer, len, ctx)
} }
} }
#include <md5-block.c>
/* These are the four functions used in the four steps of the MD5 algorithm
and defined in the RFC 1321. The first function is a little bit optimized
(as found in Colin Plumbs public domain implementation). */
/* #define FF(b, c, d) ((b & c) | (~b & d)) */
#define FF(b, c, d) (d ^ (b & (c ^ d)))
#define FG(b, c, d) FF (d, b, c)
#define FH(b, c, d) (b ^ c ^ d)
#define FI(b, c, d) (c ^ (b | ~d))
/* Process LEN bytes of BUFFER, accumulating context into CTX.
It is assumed that LEN % 64 == 0. */
void
md5_process_block (buffer, len, ctx)
const void *buffer;
size_t len;
struct md5_ctx *ctx;
{
md5_uint32 correct_words[16];
const md5_uint32 *words = buffer;
size_t nwords = len / sizeof (md5_uint32);
const md5_uint32 *endp = words + nwords;
md5_uint32 A = ctx->A;
md5_uint32 B = ctx->B;
md5_uint32 C = ctx->C;
md5_uint32 D = ctx->D;
md5_uint32 lolen = len;
/* First increment the byte count. RFC 1321 specifies the possible
length of the file up to 2^64 bits. Here we only compute the
number of bytes. Do a double word increment. */
ctx->total[0] += lolen;
ctx->total[1] += (len >> 31 >> 1) + (ctx->total[0] < lolen);
/* Process all bytes in the buffer with 64 bytes in each round of
the loop. */
while (words < endp)
{
md5_uint32 *cwp = correct_words;
md5_uint32 A_save = A;
md5_uint32 B_save = B;
md5_uint32 C_save = C;
md5_uint32 D_save = D;
/* First round: using the given function, the context and a constant
the next context is computed. Because the algorithms processing
unit is a 32-bit word and it is determined to work on words in
little endian byte order we perhaps have to change the byte order
before the computation. To reduce the work for the next steps
we store the swapped words in the array CORRECT_WORDS. */
#define OP(a, b, c, d, s, T) \
do \
{ \
a += FF (b, c, d) + (*cwp++ = SWAP (*words)) + T; \
++words; \
CYCLIC (a, s); \
a += b; \
} \
while (0)
/* It is unfortunate that C does not provide an operator for
cyclic rotation. Hope the C compiler is smart enough. */
#define CYCLIC(w, s) (w = (w << s) | (w >> (32 - s)))
/* Before we start, one word to the strange constants.
They are defined in RFC 1321 as
T[i] = (int) (4294967296.0 * fabs (sin (i))), i=1..64
*/
/* Round 1. */
OP (A, B, C, D, 7, 0xd76aa478);
OP (D, A, B, C, 12, 0xe8c7b756);
OP (C, D, A, B, 17, 0x242070db);
OP (B, C, D, A, 22, 0xc1bdceee);
OP (A, B, C, D, 7, 0xf57c0faf);
OP (D, A, B, C, 12, 0x4787c62a);
OP (C, D, A, B, 17, 0xa8304613);
OP (B, C, D, A, 22, 0xfd469501);
OP (A, B, C, D, 7, 0x698098d8);
OP (D, A, B, C, 12, 0x8b44f7af);
OP (C, D, A, B, 17, 0xffff5bb1);
OP (B, C, D, A, 22, 0x895cd7be);
OP (A, B, C, D, 7, 0x6b901122);
OP (D, A, B, C, 12, 0xfd987193);
OP (C, D, A, B, 17, 0xa679438e);
OP (B, C, D, A, 22, 0x49b40821);
/* For the second to fourth round we have the possibly swapped words
in CORRECT_WORDS. Redefine the macro to take an additional first
argument specifying the function to use. */
#undef OP
#define OP(f, a, b, c, d, k, s, T) \
do \
{ \
a += f (b, c, d) + correct_words[k] + T; \
CYCLIC (a, s); \
a += b; \
} \
while (0)
/* Round 2. */
OP (FG, A, B, C, D, 1, 5, 0xf61e2562);
OP (FG, D, A, B, C, 6, 9, 0xc040b340);
OP (FG, C, D, A, B, 11, 14, 0x265e5a51);
OP (FG, B, C, D, A, 0, 20, 0xe9b6c7aa);
OP (FG, A, B, C, D, 5, 5, 0xd62f105d);
OP (FG, D, A, B, C, 10, 9, 0x02441453);
OP (FG, C, D, A, B, 15, 14, 0xd8a1e681);
OP (FG, B, C, D, A, 4, 20, 0xe7d3fbc8);
OP (FG, A, B, C, D, 9, 5, 0x21e1cde6);
OP (FG, D, A, B, C, 14, 9, 0xc33707d6);
OP (FG, C, D, A, B, 3, 14, 0xf4d50d87);
OP (FG, B, C, D, A, 8, 20, 0x455a14ed);
OP (FG, A, B, C, D, 13, 5, 0xa9e3e905);
OP (FG, D, A, B, C, 2, 9, 0xfcefa3f8);
OP (FG, C, D, A, B, 7, 14, 0x676f02d9);
OP (FG, B, C, D, A, 12, 20, 0x8d2a4c8a);
/* Round 3. */
OP (FH, A, B, C, D, 5, 4, 0xfffa3942);
OP (FH, D, A, B, C, 8, 11, 0x8771f681);
OP (FH, C, D, A, B, 11, 16, 0x6d9d6122);
OP (FH, B, C, D, A, 14, 23, 0xfde5380c);
OP (FH, A, B, C, D, 1, 4, 0xa4beea44);
OP (FH, D, A, B, C, 4, 11, 0x4bdecfa9);
OP (FH, C, D, A, B, 7, 16, 0xf6bb4b60);
OP (FH, B, C, D, A, 10, 23, 0xbebfbc70);
OP (FH, A, B, C, D, 13, 4, 0x289b7ec6);
OP (FH, D, A, B, C, 0, 11, 0xeaa127fa);
OP (FH, C, D, A, B, 3, 16, 0xd4ef3085);
OP (FH, B, C, D, A, 6, 23, 0x04881d05);
OP (FH, A, B, C, D, 9, 4, 0xd9d4d039);
OP (FH, D, A, B, C, 12, 11, 0xe6db99e5);
OP (FH, C, D, A, B, 15, 16, 0x1fa27cf8);
OP (FH, B, C, D, A, 2, 23, 0xc4ac5665);
/* Round 4. */
OP (FI, A, B, C, D, 0, 6, 0xf4292244);
OP (FI, D, A, B, C, 7, 10, 0x432aff97);
OP (FI, C, D, A, B, 14, 15, 0xab9423a7);
OP (FI, B, C, D, A, 5, 21, 0xfc93a039);
OP (FI, A, B, C, D, 12, 6, 0x655b59c3);
OP (FI, D, A, B, C, 3, 10, 0x8f0ccc92);
OP (FI, C, D, A, B, 10, 15, 0xffeff47d);
OP (FI, B, C, D, A, 1, 21, 0x85845dd1);
OP (FI, A, B, C, D, 8, 6, 0x6fa87e4f);
OP (FI, D, A, B, C, 15, 10, 0xfe2ce6e0);
OP (FI, C, D, A, B, 6, 15, 0xa3014314);
OP (FI, B, C, D, A, 13, 21, 0x4e0811a1);
OP (FI, A, B, C, D, 4, 6, 0xf7537e82);
OP (FI, D, A, B, C, 11, 10, 0xbd3af235);
OP (FI, C, D, A, B, 2, 15, 0x2ad7d2bb);
OP (FI, B, C, D, A, 9, 21, 0xeb86d391);
/* Add the starting values of the context. */
A += A_save;
B += B_save;
C += C_save;
D += D_save;
}
/* Put checksum in context given as argument. */
ctx->A = A;
ctx->B = B;
ctx->C = C;
ctx->D = D;
}

96
crypt/sha256-block.c Normal file
View File

@ -0,0 +1,96 @@
/* Process LEN bytes of BUFFER, accumulating context into CTX.
It is assumed that LEN % 64 == 0. */
void
sha256_process_block (const void *buffer, size_t len, struct sha256_ctx *ctx)
{
const uint32_t *words = buffer;
size_t nwords = len / sizeof (uint32_t);
uint32_t a = ctx->H[0];
uint32_t b = ctx->H[1];
uint32_t c = ctx->H[2];
uint32_t d = ctx->H[3];
uint32_t e = ctx->H[4];
uint32_t f = ctx->H[5];
uint32_t g = ctx->H[6];
uint32_t h = ctx->H[7];
/* First increment the byte count. FIPS 180-2 specifies the possible
length of the file up to 2^64 bits. Here we only compute the
number of bytes. */
ctx->total64 += len;
/* Process all bytes in the buffer with 64 bytes in each round of
the loop. */
while (nwords > 0)
{
uint32_t W[64];
uint32_t a_save = a;
uint32_t b_save = b;
uint32_t c_save = c;
uint32_t d_save = d;
uint32_t e_save = e;
uint32_t f_save = f;
uint32_t g_save = g;
uint32_t h_save = h;
/* Operators defined in FIPS 180-2:4.1.2. */
#define Ch(x, y, z) ((x & y) ^ (~x & z))
#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
#define S0(x) (CYCLIC (x, 2) ^ CYCLIC (x, 13) ^ CYCLIC (x, 22))
#define S1(x) (CYCLIC (x, 6) ^ CYCLIC (x, 11) ^ CYCLIC (x, 25))
#define R0(x) (CYCLIC (x, 7) ^ CYCLIC (x, 18) ^ (x >> 3))
#define R1(x) (CYCLIC (x, 17) ^ CYCLIC (x, 19) ^ (x >> 10))
/* It is unfortunate that C does not provide an operator for
cyclic rotation. Hope the C compiler is smart enough. */
#define CYCLIC(w, s) ((w >> s) | (w << (32 - s)))
/* Compute the message schedule according to FIPS 180-2:6.2.2 step 2. */
for (unsigned int t = 0; t < 16; ++t)
{
W[t] = SWAP (*words);
++words;
}
for (unsigned int t = 16; t < 64; ++t)
W[t] = R1 (W[t - 2]) + W[t - 7] + R0 (W[t - 15]) + W[t - 16];
/* The actual computation according to FIPS 180-2:6.2.2 step 3. */
for (unsigned int t = 0; t < 64; ++t)
{
uint32_t T1 = h + S1 (e) + Ch (e, f, g) + K[t] + W[t];
uint32_t T2 = S0 (a) + Maj (a, b, c);
h = g;
g = f;
f = e;
e = d + T1;
d = c;
c = b;
b = a;
a = T1 + T2;
}
/* Add the starting values of the context according to FIPS 180-2:6.2.2
step 4. */
a += a_save;
b += b_save;
c += c_save;
d += d_save;
e += e_save;
f += f_save;
g += g_save;
h += h_save;
/* Prepare for the next round. */
nwords -= 16;
}
/* Put checksum in context given as argument. */
ctx->H[0] = a;
ctx->H[1] = b;
ctx->H[2] = c;
ctx->H[3] = d;
ctx->H[4] = e;
ctx->H[5] = f;
ctx->H[6] = g;
ctx->H[7] = h;
}

View File

@ -80,104 +80,8 @@ static const uint32_t K[64] =
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
}; };
void
/* Process LEN bytes of BUFFER, accumulating context into CTX. sha256_process_block (const void *, size_t, struct sha256_ctx *);
It is assumed that LEN % 64 == 0. */
static void
sha256_process_block (const void *buffer, size_t len, struct sha256_ctx *ctx)
{
const uint32_t *words = buffer;
size_t nwords = len / sizeof (uint32_t);
uint32_t a = ctx->H[0];
uint32_t b = ctx->H[1];
uint32_t c = ctx->H[2];
uint32_t d = ctx->H[3];
uint32_t e = ctx->H[4];
uint32_t f = ctx->H[5];
uint32_t g = ctx->H[6];
uint32_t h = ctx->H[7];
/* First increment the byte count. FIPS 180-2 specifies the possible
length of the file up to 2^64 bits. Here we only compute the
number of bytes. */
ctx->total64 += len;
/* Process all bytes in the buffer with 64 bytes in each round of
the loop. */
while (nwords > 0)
{
uint32_t W[64];
uint32_t a_save = a;
uint32_t b_save = b;
uint32_t c_save = c;
uint32_t d_save = d;
uint32_t e_save = e;
uint32_t f_save = f;
uint32_t g_save = g;
uint32_t h_save = h;
/* Operators defined in FIPS 180-2:4.1.2. */
#define Ch(x, y, z) ((x & y) ^ (~x & z))
#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
#define S0(x) (CYCLIC (x, 2) ^ CYCLIC (x, 13) ^ CYCLIC (x, 22))
#define S1(x) (CYCLIC (x, 6) ^ CYCLIC (x, 11) ^ CYCLIC (x, 25))
#define R0(x) (CYCLIC (x, 7) ^ CYCLIC (x, 18) ^ (x >> 3))
#define R1(x) (CYCLIC (x, 17) ^ CYCLIC (x, 19) ^ (x >> 10))
/* It is unfortunate that C does not provide an operator for
cyclic rotation. Hope the C compiler is smart enough. */
#define CYCLIC(w, s) ((w >> s) | (w << (32 - s)))
/* Compute the message schedule according to FIPS 180-2:6.2.2 step 2. */
for (unsigned int t = 0; t < 16; ++t)
{
W[t] = SWAP (*words);
++words;
}
for (unsigned int t = 16; t < 64; ++t)
W[t] = R1 (W[t - 2]) + W[t - 7] + R0 (W[t - 15]) + W[t - 16];
/* The actual computation according to FIPS 180-2:6.2.2 step 3. */
for (unsigned int t = 0; t < 64; ++t)
{
uint32_t T1 = h + S1 (e) + Ch (e, f, g) + K[t] + W[t];
uint32_t T2 = S0 (a) + Maj (a, b, c);
h = g;
g = f;
f = e;
e = d + T1;
d = c;
c = b;
b = a;
a = T1 + T2;
}
/* Add the starting values of the context according to FIPS 180-2:6.2.2
step 4. */
a += a_save;
b += b_save;
c += c_save;
d += d_save;
e += e_save;
f += f_save;
g += g_save;
h += h_save;
/* Prepare for the next round. */
nwords -= 16;
}
/* Put checksum in context given as argument. */
ctx->H[0] = a;
ctx->H[1] = b;
ctx->H[2] = c;
ctx->H[3] = d;
ctx->H[4] = e;
ctx->H[5] = f;
ctx->H[6] = g;
ctx->H[7] = h;
}
/* Initialize structure containing state of computation. /* Initialize structure containing state of computation.
(FIPS 180-2:5.3.2) */ (FIPS 180-2:5.3.2) */
@ -312,3 +216,5 @@ __sha256_process_bytes (buffer, len, ctx)
ctx->buflen = left_over; ctx->buflen = left_over;
} }
} }
#include <sha256-block.c>

103
crypt/sha512-block.c Normal file
View File

@ -0,0 +1,103 @@
/* Process LEN bytes of BUFFER, accumulating context into CTX.
It is assumed that LEN % 128 == 0. */
void
sha512_process_block (const void *buffer, size_t len, struct sha512_ctx *ctx)
{
const uint64_t *words = buffer;
size_t nwords = len / sizeof (uint64_t);
uint64_t a = ctx->H[0];
uint64_t b = ctx->H[1];
uint64_t c = ctx->H[2];
uint64_t d = ctx->H[3];
uint64_t e = ctx->H[4];
uint64_t f = ctx->H[5];
uint64_t g = ctx->H[6];
uint64_t h = ctx->H[7];
/* First increment the byte count. FIPS 180-2 specifies the possible
length of the file up to 2^128 bits. Here we only compute the
number of bytes. Do a double word increment. */
#ifdef USE_TOTAL128
ctx->total128 += len;
#else
uint64_t lolen = len;
ctx->total[TOTAL128_low] += lolen;
ctx->total[TOTAL128_high] += ((len >> 31 >> 31 >> 2)
+ (ctx->total[TOTAL128_low] < lolen));
#endif
/* Process all bytes in the buffer with 128 bytes in each round of
the loop. */
while (nwords > 0)
{
uint64_t W[80];
uint64_t a_save = a;
uint64_t b_save = b;
uint64_t c_save = c;
uint64_t d_save = d;
uint64_t e_save = e;
uint64_t f_save = f;
uint64_t g_save = g;
uint64_t h_save = h;
/* Operators defined in FIPS 180-2:4.1.2. */
#define Ch(x, y, z) ((x & y) ^ (~x & z))
#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
#define S0(x) (CYCLIC (x, 28) ^ CYCLIC (x, 34) ^ CYCLIC (x, 39))
#define S1(x) (CYCLIC (x, 14) ^ CYCLIC (x, 18) ^ CYCLIC (x, 41))
#define R0(x) (CYCLIC (x, 1) ^ CYCLIC (x, 8) ^ (x >> 7))
#define R1(x) (CYCLIC (x, 19) ^ CYCLIC (x, 61) ^ (x >> 6))
/* It is unfortunate that C does not provide an operator for
cyclic rotation. Hope the C compiler is smart enough. */
#define CYCLIC(w, s) ((w >> s) | (w << (64 - s)))
/* Compute the message schedule according to FIPS 180-2:6.3.2 step 2. */
for (unsigned int t = 0; t < 16; ++t)
{
W[t] = SWAP (*words);
++words;
}
for (unsigned int t = 16; t < 80; ++t)
W[t] = R1 (W[t - 2]) + W[t - 7] + R0 (W[t - 15]) + W[t - 16];
/* The actual computation according to FIPS 180-2:6.3.2 step 3. */
for (unsigned int t = 0; t < 80; ++t)
{
uint64_t T1 = h + S1 (e) + Ch (e, f, g) + K[t] + W[t];
uint64_t T2 = S0 (a) + Maj (a, b, c);
h = g;
g = f;
f = e;
e = d + T1;
d = c;
c = b;
b = a;
a = T1 + T2;
}
/* Add the starting values of the context according to FIPS 180-2:6.3.2
step 4. */
a += a_save;
b += b_save;
c += c_save;
d += d_save;
e += e_save;
f += f_save;
g += g_save;
h += h_save;
/* Prepare for the next round. */
nwords -= 16;
}
/* Put checksum in context given as argument. */
ctx->H[0] = a;
ctx->H[1] = b;
ctx->H[2] = c;
ctx->H[3] = d;
ctx->H[4] = e;
ctx->H[5] = f;
ctx->H[6] = g;
ctx->H[7] = h;
}

View File

@ -100,111 +100,8 @@ static const uint64_t K[80] =
UINT64_C (0x5fcb6fab3ad6faec), UINT64_C (0x6c44198c4a475817) UINT64_C (0x5fcb6fab3ad6faec), UINT64_C (0x6c44198c4a475817)
}; };
void
/* Process LEN bytes of BUFFER, accumulating context into CTX. sha512_process_block (const void *buffer, size_t len, struct sha512_ctx *ctx);
It is assumed that LEN % 128 == 0. */
static void
sha512_process_block (const void *buffer, size_t len, struct sha512_ctx *ctx)
{
const uint64_t *words = buffer;
size_t nwords = len / sizeof (uint64_t);
uint64_t a = ctx->H[0];
uint64_t b = ctx->H[1];
uint64_t c = ctx->H[2];
uint64_t d = ctx->H[3];
uint64_t e = ctx->H[4];
uint64_t f = ctx->H[5];
uint64_t g = ctx->H[6];
uint64_t h = ctx->H[7];
/* First increment the byte count. FIPS 180-2 specifies the possible
length of the file up to 2^128 bits. Here we only compute the
number of bytes. Do a double word increment. */
#ifdef USE_TOTAL128
ctx->total128 += len;
#else
uint64_t lolen = len;
ctx->total[TOTAL128_low] += lolen;
ctx->total[TOTAL128_high] += ((len >> 31 >> 31 >> 2)
+ (ctx->total[TOTAL128_low] < lolen));
#endif
/* Process all bytes in the buffer with 128 bytes in each round of
the loop. */
while (nwords > 0)
{
uint64_t W[80];
uint64_t a_save = a;
uint64_t b_save = b;
uint64_t c_save = c;
uint64_t d_save = d;
uint64_t e_save = e;
uint64_t f_save = f;
uint64_t g_save = g;
uint64_t h_save = h;
/* Operators defined in FIPS 180-2:4.1.2. */
#define Ch(x, y, z) ((x & y) ^ (~x & z))
#define Maj(x, y, z) ((x & y) ^ (x & z) ^ (y & z))
#define S0(x) (CYCLIC (x, 28) ^ CYCLIC (x, 34) ^ CYCLIC (x, 39))
#define S1(x) (CYCLIC (x, 14) ^ CYCLIC (x, 18) ^ CYCLIC (x, 41))
#define R0(x) (CYCLIC (x, 1) ^ CYCLIC (x, 8) ^ (x >> 7))
#define R1(x) (CYCLIC (x, 19) ^ CYCLIC (x, 61) ^ (x >> 6))
/* It is unfortunate that C does not provide an operator for
cyclic rotation. Hope the C compiler is smart enough. */
#define CYCLIC(w, s) ((w >> s) | (w << (64 - s)))
/* Compute the message schedule according to FIPS 180-2:6.3.2 step 2. */
for (unsigned int t = 0; t < 16; ++t)
{
W[t] = SWAP (*words);
++words;
}
for (unsigned int t = 16; t < 80; ++t)
W[t] = R1 (W[t - 2]) + W[t - 7] + R0 (W[t - 15]) + W[t - 16];
/* The actual computation according to FIPS 180-2:6.3.2 step 3. */
for (unsigned int t = 0; t < 80; ++t)
{
uint64_t T1 = h + S1 (e) + Ch (e, f, g) + K[t] + W[t];
uint64_t T2 = S0 (a) + Maj (a, b, c);
h = g;
g = f;
f = e;
e = d + T1;
d = c;
c = b;
b = a;
a = T1 + T2;
}
/* Add the starting values of the context according to FIPS 180-2:6.3.2
step 4. */
a += a_save;
b += b_save;
c += c_save;
d += d_save;
e += e_save;
f += f_save;
g += g_save;
h += h_save;
/* Prepare for the next round. */
nwords -= 16;
}
/* Put checksum in context given as argument. */
ctx->H[0] = a;
ctx->H[1] = b;
ctx->H[2] = c;
ctx->H[3] = d;
ctx->H[4] = e;
ctx->H[5] = f;
ctx->H[6] = g;
ctx->H[7] = h;
}
/* Initialize structure containing state of computation. /* Initialize structure containing state of computation.
(FIPS 180-2:5.3.3) */ (FIPS 180-2:5.3.3) */
@ -342,3 +239,5 @@ __sha512_process_bytes (buffer, len, ctx)
ctx->buflen = left_over; ctx->buflen = left_over;
} }
} }
#include <sha512-block.c>

View File

@ -59,6 +59,8 @@ GPERFFLAGS = -acCgopt -k1,2,5,9,$$ -L ANSI-C
include ../Rules include ../Rules
CFLAGS-md5.c = -I../crypt
programs/%-kw.h: programs/%-kw.gperf programs/%-kw.h: programs/%-kw.gperf
cd programs \ cd programs \
&& $(GPERF) $(GPERFFLAGS) -N $(@F:-kw.h=_hash) $(<F) > $(@F).new && $(GPERF) $(GPERFFLAGS) -N $(@F:-kw.h=_hash) $(<F) > $(@F).new

View File

@ -109,4 +109,6 @@ END (__##name)
} \ } \
__asm__ (".type " #name ", %gnu_indirect_function"); __asm__ (".type " #name ", %gnu_indirect_function");
# define sparc_libc_ifunc(name, expr) sparc_libm_ifunc (name, expr)
#endif /* __ASSEMBLER__ */ #endif /* __ASSEMBLER__ */

View File

@ -1,3 +1,11 @@
ifeq ($(subdir),crypt)
libcrypt-sysdep_routines += md5-crop sha256-crop sha512-crop
endif
ifeq ($(subdir),locale)
localedef-aux += md5-crop
endif
ifeq ($(subdir),string) ifeq ($(subdir),string)
sysdep_routines += memcpy-ultra3 memcpy-niagara1 memcpy-niagara2 \ sysdep_routines += memcpy-ultra3 memcpy-niagara1 memcpy-niagara2 \
memset-niagara1 memcpy-niagara4 memset-niagara4 memset-niagara1 memcpy-niagara4 memset-niagara4

View File

@ -0,0 +1 @@
#include <sparc64/multiarch/md5-block.c>

View File

@ -0,0 +1 @@
#include <sparc64/multiarch/md5-crop.S>

View File

@ -0,0 +1 @@
#include <sparc64/multiarch/sha256-block.c>

View File

@ -0,0 +1 @@
#include <sparc64/multiarch/sha256-crop.S>

View File

@ -0,0 +1 @@
#include <sparc64/multiarch/sha512-block.c>

View File

@ -0,0 +1 @@
#include <sparc64/multiarch/sha512-crop.S>

View File

@ -1,3 +1,11 @@
ifeq ($(subdir),crypt)
libcrypt-sysdep_routines += md5-crop sha256-crop sha512-crop
endif
ifeq ($(subdir),locale)
localedef-aux += md5-crop
endif
ifeq ($(subdir),string) ifeq ($(subdir),string)
sysdep_routines += memcpy-ultra3 memcpy-niagara1 memcpy-niagara2 \ sysdep_routines += memcpy-ultra3 memcpy-niagara1 memcpy-niagara2 \
memset-niagara1 memcpy-niagara4 memset-niagara4 memset-niagara1 memcpy-niagara4 memset-niagara4

View File

@ -0,0 +1,29 @@
#include <sparc-ifunc.h>
#define __md5_process_block __md5_process_block_generic
extern void __md5_process_block_generic (const void *buffer, size_t len,
struct md5_ctx *ctx);
#include <crypt/md5-block.c>
#undef __md5_process_block
extern void __md5_process_block_crop (const void *buffer, size_t len,
struct md5_ctx *ctx);
static bool cpu_supports_md5(int hwcap)
{
unsigned long cfr;
if (!(hwcap & HWCAP_SPARC_CRYPTO))
return false;
__asm__ ("rd %%asr26, %0" : "=r" (cfr));
if (cfr & (1 << 4))
return true;
return false;
}
extern void __md5_process_block (const void *buffer, size_t len,
struct md5_ctx *ctx);
sparc_libc_ifunc(__md5_process_block, cpu_supports_md5(hwcap) ? __md5_process_block_crop : __md5_process_block_generic);

View File

@ -0,0 +1,110 @@
/* MD5 using sparc crypto opcodes.
Copyright (C) 2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by David S. Miller (davem@davemloft.net)
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
#define ASI_PL 0x88
#define MD5 \
.word 0x81b02800;
.text
.align 32
ENTRY(__md5_process_block_crop)
/* %o0=buffer, %o1=len, %o2=CTX */
ld [%o2 + 0x10], %g1
add %g1, %o1, %o4
st %o4, [%o2 + 0x10]
clr %o5
cmp %o4, %g1
movlu %icc, 1, %o5
#ifdef __arch64__
srlx %o1, 32, %o4
add %o5, %o4, %o5
#endif
ld [%o2 + 0x14], %o4
add %o4, %o5, %o4
st %o4, [%o2 + 0x14]
lda [%o2] ASI_PL, %f0
add %o2, 0x4, %g1
lda [%g1] ASI_PL, %f1
add %o2, 0x8, %g1
andcc %o0, 0x7, %g0
lda [%g1] ASI_PL, %f2
add %o2, 0xc, %g1
bne,pn %xcc, 10f
lda [%g1] ASI_PL, %f3
1:
ldd [%o0 + 0x00], %f8
ldd [%o0 + 0x08], %f10
ldd [%o0 + 0x10], %f12
ldd [%o0 + 0x18], %f14
ldd [%o0 + 0x20], %f16
ldd [%o0 + 0x28], %f18
ldd [%o0 + 0x30], %f20
ldd [%o0 + 0x38], %f22
MD5
subcc %o1, 64, %o1
bne,pt %xcc, 1b
add %o0, 0x40, %o0
5:
sta %f0, [%o2] ASI_PL
add %o2, 0x4, %g1
sta %f1, [%g1] ASI_PL
add %o2, 0x8, %g1
sta %f2, [%g1] ASI_PL
add %o2, 0xc, %g1
retl
sta %f3, [%g1] ASI_PL
10:
alignaddr %o0, %g0, %o0
ldd [%o0 + 0x00], %f10
1:
ldd [%o0 + 0x08], %f12
ldd [%o0 + 0x10], %f14
ldd [%o0 + 0x18], %f16
ldd [%o0 + 0x20], %f18
ldd [%o0 + 0x28], %f20
ldd [%o0 + 0x30], %f22
ldd [%o0 + 0x38], %f24
ldd [%o0 + 0x40], %f26
faligndata %f10, %f12, %f8
faligndata %f12, %f14, %f10
faligndata %f14, %f16, %f12
faligndata %f16, %f18, %f14
faligndata %f18, %f20, %f16
faligndata %f20, %f22, %f18
faligndata %f22, %f24, %f20
faligndata %f24, %f26, %f22
MD5
subcc %o1, 64, %o1
fsrc2 %f26, %f10
bne,pt %xcc, 1b
add %o0, 0x40, %o0
ba,a,pt %xcc, 5b
END(__md5_process_block_crop)

View File

@ -0,0 +1,30 @@
#include <sparc-ifunc.h>
#define sha256_process_block sha256_process_block_generic
extern void sha256_process_block_generic (const void *buffer, size_t len,
struct sha256_ctx *ctx);
#include <crypt/sha256-block.c>
#undef sha256_process_block
extern void __sha256_process_block_crop (const void *buffer, size_t len,
struct sha256_ctx *ctx);
static bool cpu_supports_sha256(int hwcap)
{
unsigned long cfr;
if (!(hwcap & HWCAP_SPARC_CRYPTO))
return false;
__asm__ ("rd %%asr26, %0" : "=r" (cfr));
if (cfr & (1 << 6))
return true;
return false;
}
extern void sha256_process_block (const void *buffer, size_t len,
struct sha256_ctx *ctx);
sparc_libc_ifunc(sha256_process_block, cpu_supports_sha256(hwcap) ? __sha256_process_block_crop : sha256_process_block_generic);

View File

@ -0,0 +1,101 @@
/* SHA256 using sparc crypto opcodes.
Copyright (C) 2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by David S. Miller (davem@davemloft.net)
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
#define SHA256 \
.word 0x81b02840;
.text
.align 32
ENTRY(__sha256_process_block_crop)
/* %o0=buffer, %o1=len, %o2=CTX */
ldx [%o2 + 0x20], %g1
add %g1, %o1, %g1
stx %g1, [%o2 + 0x20]
ld [%o2 + 0x00], %f0
ld [%o2 + 0x04], %f1
ld [%o2 + 0x08], %f2
ld [%o2 + 0x0c], %f3
ld [%o2 + 0x10], %f4
ld [%o2 + 0x14], %f5
andcc %o1, 0x7, %g0
ld [%o2 + 0x18], %f6
bne,pn %xcc, 10f
ld [%o2 + 0x1c], %f7
1:
ldd [%o0 + 0x00], %f8
ldd [%o0 + 0x08], %f10
ldd [%o0 + 0x10], %f12
ldd [%o0 + 0x18], %f14
ldd [%o0 + 0x20], %f16
ldd [%o0 + 0x28], %f18
ldd [%o0 + 0x30], %f20
ldd [%o0 + 0x38], %f22
SHA256
subcc %o1, 0x40, %o1
bne,pt %xcc, 1b
add %o0, 0x40, %o0
5:
st %f0, [%o2 + 0x00]
st %f1, [%o2 + 0x04]
st %f2, [%o2 + 0x08]
st %f3, [%o2 + 0x0c]
st %f4, [%o2 + 0x10]
st %f5, [%o2 + 0x14]
st %f6, [%o2 + 0x18]
retl
st %f7, [%o2 + 0x1c]
10:
alignaddr %o0, %g0, %o0
ldd [%o0 + 0x00], %f10
1:
ldd [%o0 + 0x08], %f12
ldd [%o0 + 0x10], %f14
ldd [%o0 + 0x18], %f16
ldd [%o0 + 0x20], %f18
ldd [%o0 + 0x28], %f20
ldd [%o0 + 0x30], %f22
ldd [%o0 + 0x38], %f24
ldd [%o0 + 0x40], %f26
faligndata %f10, %f12, %f8
faligndata %f12, %f14, %f10
faligndata %f14, %f16, %f12
faligndata %f16, %f18, %f14
faligndata %f18, %f20, %f16
faligndata %f20, %f22, %f18
faligndata %f22, %f24, %f20
faligndata %f24, %f26, %f22
SHA256
subcc %o1, 0x40, %o1
fsrc2 %f26, %f10
bne,pt %xcc, 1b
add %o0, 0x40, %o0
ba,a,pt %xcc, 5b
END(__sha256_process_block_crop)

View File

@ -0,0 +1,30 @@
#include <sparc-ifunc.h>
#define sha512_process_block sha512_process_block_generic
extern void sha512_process_block_generic (const void *buffer, size_t len,
struct sha512_ctx *ctx);
#include <crypt/sha512-block.c>
#undef sha512_process_block
extern void __sha512_process_block_crop (const void *buffer, size_t len,
struct sha512_ctx *ctx);
static bool cpu_supports_sha512(int hwcap)
{
unsigned long cfr;
if (!(hwcap & HWCAP_SPARC_CRYPTO))
return false;
__asm__ ("rd %%asr26, %0" : "=r" (cfr));
if (cfr & (1 << 6))
return true;
return false;
}
extern void sha512_process_block (const void *buffer, size_t len,
struct sha512_ctx *ctx);
sparc_libc_ifunc(sha512_process_block, cpu_supports_sha512(hwcap) ? __sha512_process_block_crop : sha512_process_block_generic);

View File

@ -0,0 +1,131 @@
/* SHA512 using sparc crypto opcodes.
Copyright (C) 2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by David S. Miller (davem@davemloft.net)
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include <sysdep.h>
#define SHA512 \
.word 0x81b02860;
.text
.align 32
ENTRY(__sha512_process_block_crop)
/* %o0=buffer, %o1=len, %o2=CTX */
ldx [%o2 + 0x48], %g1
add %g1, %o1, %o4
stx %o4, [%o2 + 0x48]
cmp %o4, %g1
bgeu,pt %xcc, 1f
nop
ldx [%o2 + 0x40], %g1
add %g1, 1, %g1
stx %g1, [%o2 + 0x40]
1: ldd [%o2 + 0x00], %f0
ldd [%o2 + 0x08], %f2
ldd [%o2 + 0x10], %f4
ldd [%o2 + 0x18], %f6
ldd [%o2 + 0x20], %f8
ldd [%o2 + 0x28], %f10
andcc %o1, 0x7, %g0
ldd [%o2 + 0x30], %f12
bne,pn %xcc, 10f
ldd [%o2 + 0x38], %f14
1:
ldd [%o0 + 0x00], %f16
ldd [%o0 + 0x08], %f18
ldd [%o0 + 0x10], %f20
ldd [%o0 + 0x18], %f22
ldd [%o0 + 0x20], %f24
ldd [%o0 + 0x28], %f26
ldd [%o0 + 0x30], %f28
ldd [%o0 + 0x38], %f30
ldd [%o0 + 0x40], %f32
ldd [%o0 + 0x48], %f34
ldd [%o0 + 0x50], %f36
ldd [%o0 + 0x58], %f38
ldd [%o0 + 0x60], %f40
ldd [%o0 + 0x68], %f42
ldd [%o0 + 0x70], %f44
ldd [%o0 + 0x78], %f46
SHA512
subcc %o1, 0x80, %o1
bne,pt %xcc, 1b
add %o0, 0x80, %o0
5:
std %f0, [%o2 + 0x00]
std %f2, [%o2 + 0x08]
std %f4, [%o2 + 0x10]
std %f6, [%o2 + 0x18]
std %f8, [%o2 + 0x20]
std %f10, [%o2 + 0x28]
std %f12, [%o2 + 0x30]
retl
std %f14, [%o2 + 0x38]
10:
alignaddr %o0, %g0, %o0
ldd [%o0 + 0x00], %f18
1:
ldd [%o0 + 0x08], %f20
ldd [%o0 + 0x10], %f22
ldd [%o0 + 0x18], %f24
ldd [%o0 + 0x20], %f26
ldd [%o0 + 0x28], %f28
ldd [%o0 + 0x30], %f30
ldd [%o0 + 0x38], %f32
ldd [%o0 + 0x40], %f34
ldd [%o0 + 0x48], %f36
ldd [%o0 + 0x50], %f38
ldd [%o0 + 0x58], %f40
ldd [%o0 + 0x60], %f42
ldd [%o0 + 0x68], %f44
ldd [%o0 + 0x70], %f46
ldd [%o0 + 0x78], %f48
ldd [%o0 + 0x80], %f50
faligndata %f18, %f20, %f16
faligndata %f20, %f22, %f18
faligndata %f22, %f24, %f20
faligndata %f24, %f26, %f22
faligndata %f26, %f28, %f24
faligndata %f28, %f30, %f26
faligndata %f30, %f32, %f28
faligndata %f32, %f34, %f30
faligndata %f34, %f36, %f32
faligndata %f36, %f38, %f34
faligndata %f38, %f40, %f36
faligndata %f40, %f42, %f38
faligndata %f42, %f44, %f40
faligndata %f44, %f46, %f42
faligndata %f46, %f48, %f44
faligndata %f48, %f50, %f46
SHA512
subcc %o1, 0x80, %o1
fsrc2 %f50, %f18
bne,pt %xcc, 1b
add %o0, 0x80, %o0
ba,a,pt %xcc, 5b
END(__sha512_process_block_crop)