mirror of
https://github.com/php/php-src.git
synced 2024-11-23 01:44:06 +08:00
hash: Add SHA-NI implementation of SHA-256 (#15152)
* hash: Add SSE2 implementation of SHA-256 Implementation taken from tarsnap/libcperciva@661752aee8. Co-authored-by: Christoph M. Becker <cmbecker69@gmx.de> Co-authored-by: Niels Dossche <7771979+nielsdos@users.noreply.github.com> * zend_cpuinfo: Add ZEND_CPU_FEATURE_SHA * hash: Add SHA-NI implementation of SHA-256 Implementation taken from tarsnap/libcperciva@661752aee8. Co-authored-by: Christoph M. Becker <cmbecker69@gmx.de> * NEWS / UPGRADING --------- Co-authored-by: Christoph M. Becker <cmbecker69@gmx.de> Co-authored-by: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
This commit is contained in:
parent
a355c3572e
commit
6eca7839af
2
NEWS
2
NEWS
@ -26,6 +26,8 @@ PHP NEWS
|
||||
- Hash:
|
||||
. Deprecated passing incorrect data types for options to ext/hash functions.
|
||||
(nielsdos)
|
||||
. Added SSE2 and SHA-NI implementation of SHA-256. (timwolla, Colin Percival,
|
||||
Graham Percival)
|
||||
|
||||
- PHPDBG:
|
||||
. array out of bounds, stack overflow handled for segfault handler on windows.
|
||||
|
@ -18,7 +18,7 @@
|
||||
18. avifinfo (ext/standard/libavifinfo) see ext/standard/libavifinfo/LICENSE
|
||||
19. xxHash (ext/hash/xxhash)
|
||||
20. Lexbor (ext/dom/lexbor/lexbor) see ext/dom/lexbor/LICENSE
|
||||
|
||||
21. Portions of libcperciva (ext/hash/hash_sha_{ni,sse2}.c) see the header in the source file
|
||||
|
||||
3. pcre2lib (ext/pcre)
|
||||
|
||||
|
@ -955,6 +955,10 @@ PHP 8.4 UPGRADE NOTES
|
||||
. Improved the performance of FTP uploads up to a factor of 10x for large
|
||||
uploads.
|
||||
|
||||
- Hash:
|
||||
. Added SSE2 and SHA-NI implementations of SHA-256. This improves the performance
|
||||
on supported CPUs by ~1.3x (SSE2) and 3x - 5x (SHA-NI).
|
||||
|
||||
- MBString:
|
||||
. The performance of strspn() and strcspn() is greatly improved.
|
||||
They now run in linear time instead of being bounded by quadratic time.
|
||||
|
@ -64,6 +64,7 @@ typedef enum _zend_cpu_feature {
|
||||
ZEND_CPU_FEATURE_AVX512F = (1<<16 | ZEND_CPU_EBX_MASK),
|
||||
ZEND_CPU_FEATURE_AVX512DQ = (1<<17 | ZEND_CPU_EBX_MASK),
|
||||
ZEND_CPU_FEATURE_AVX512CD = (1<<28 | ZEND_CPU_EBX_MASK),
|
||||
ZEND_CPU_FEATURE_SHA = (1<<29 | ZEND_CPU_EBX_MASK),
|
||||
/* intentionally don't support = (1<<30 | ZEND_CPU_EBX_MASK) */
|
||||
/* intentionally don't support = (1<<31 | ZEND_CPU_EBX_MASK) */
|
||||
|
||||
|
@ -34,7 +34,7 @@ else
|
||||
PHP_HASH_CFLAGS="$PHP_HASH_CFLAGS -I@ext_srcdir@/$SHA3_DIR -DKeccakP200_excluded -DKeccakP400_excluded -DKeccakP800_excluded -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1"
|
||||
fi
|
||||
|
||||
EXT_HASH_SOURCES="hash.c hash_md.c hash_sha.c hash_ripemd.c hash_haval.c \
|
||||
EXT_HASH_SOURCES="hash.c hash_md.c hash_sha.c hash_sha_sse2.c hash_sha_ni.c hash_ripemd.c hash_haval.c \
|
||||
hash_tiger.c hash_gost.c hash_snefru.c hash_whirlpool.c hash_adler32.c \
|
||||
hash_crc32.c hash_fnv.c hash_joaat.c $EXT_HASH_SHA3_SOURCES
|
||||
murmur/PMurHash.c murmur/PMurHash128.c hash_murmur.c hash_xxhash.c"
|
||||
|
@ -9,7 +9,7 @@ if (PHP_MHASH != 'no') {
|
||||
|
||||
PHP_HASH = 'yes';
|
||||
|
||||
EXTENSION('hash', 'hash.c hash_md.c hash_sha.c hash_ripemd.c hash_haval.c ' +
|
||||
EXTENSION('hash', 'hash.c hash_md.c hash_sha.c hash_sha_sse2.c hash_sha_ni.c hash_ripemd.c hash_haval.c ' +
|
||||
'hash_tiger.c hash_gost.c hash_snefru.c hash_whirlpool.c ' +
|
||||
'hash_adler32.c hash_crc32.c hash_joaat.c hash_fnv.c ' +
|
||||
'hash_sha3.c hash_murmur.c hash_xxhash.c', false);
|
||||
|
@ -17,6 +17,7 @@
|
||||
|
||||
#include "php_hash.h"
|
||||
#include "php_hash_sha.h"
|
||||
#include "Zend/zend_cpuinfo.h"
|
||||
|
||||
static const unsigned char PADDING[128] =
|
||||
{
|
||||
@ -160,6 +161,24 @@ PHP_HASH_API void PHP_SHA256InitArgs(PHP_SHA256_CTX * context, ZEND_ATTRIBUTE_UN
|
||||
*/
|
||||
static void SHA256Transform(uint32_t state[8], const unsigned char block[64])
|
||||
{
|
||||
#if defined(PHP_HASH_INTRIN_SHA_NATIVE)
|
||||
SHA256_Transform_shani(state, block);
|
||||
return;
|
||||
#elif defined(PHP_HASH_INTRIN_SHA_RESOLVER)
|
||||
if (zend_cpu_supports(ZEND_CPU_FEATURE_SSSE3) && zend_cpu_supports(ZEND_CPU_FEATURE_SHA)) {
|
||||
SHA256_Transform_shani(state, block);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__SSE2__)
|
||||
uint32_t tmp32[72];
|
||||
|
||||
SHA256_Transform_sse2(state, block, &tmp32[0], &tmp32[64]);
|
||||
ZEND_SECURE_ZERO((unsigned char*) tmp32, sizeof(tmp32));
|
||||
return;
|
||||
#endif
|
||||
|
||||
uint32_t a = state[0], b = state[1], c = state[2], d = state[3];
|
||||
uint32_t e = state[4], f = state[5], g = state[6], h = state[7];
|
||||
uint32_t x[16], T1, T2, W[64];
|
||||
|
176
ext/hash/hash_sha_ni.c
Normal file
176
ext/hash/hash_sha_ni.c
Normal file
@ -0,0 +1,176 @@
|
||||
/*-
|
||||
* Copyright 2018 Tarsnap Backup Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "php_hash.h"
|
||||
#include "php_hash_sha.h"
|
||||
|
||||
#if (defined(__i386__) || defined(__x86_64__)) && defined(HAVE_IMMINTRIN_H)
|
||||
|
||||
# include <immintrin.h>
|
||||
|
||||
# if PHP_HASH_INTRIN_SHA_RESOLVER
|
||||
static __m128i be32dec_128(const uint8_t * src) __attribute__((target("ssse3")));
|
||||
void SHA256_Transform_shani(uint32_t state[PHP_STATIC_RESTRICT 8], const uint8_t block[PHP_STATIC_RESTRICT 64]) __attribute__((target("ssse3,sha")));
|
||||
# endif
|
||||
|
||||
/* Original implementation from libcperciva follows.
|
||||
*
|
||||
* Modified to use `PHP_STATIC_RESTRICT` for MSVC compatibility.
|
||||
*/
|
||||
|
||||
/**
|
||||
* This code uses intrinsics from the following feature sets:
|
||||
* SHANI: _mm_sha256msg1_epu32, _mm_sha256msg2_epu32, _mm_sha256rnds2_epu32
|
||||
* SSSE3: _mm_shuffle_epi8, _mm_alignr_epi8
|
||||
* SSE2: Everything else
|
||||
*
|
||||
* The SSSE3 intrinsics could be avoided at a slight cost by using a few SSE2
|
||||
* instructions in their place; we have not done this since to our knowledge
|
||||
* there are presently no CPUs which support the SHANI instruction set but do
|
||||
* not support SSSE3.
|
||||
*/
|
||||
|
||||
/* Load 32-bit big-endian words. */
|
||||
static __m128i
|
||||
be32dec_128(const uint8_t * src)
|
||||
{
|
||||
const __m128i SHUF = _mm_set_epi8(12, 13, 14, 15, 8, 9, 10, 11,
|
||||
4, 5, 6, 7, 0, 1, 2, 3);
|
||||
__m128i x;
|
||||
|
||||
/* Load four 32-bit words. */
|
||||
x = _mm_loadu_si128((const __m128i *)src);
|
||||
|
||||
/* Reverse the order of the bytes in each word. */
|
||||
return (_mm_shuffle_epi8(x, SHUF));
|
||||
}
|
||||
|
||||
/* Convert an unsigned 32-bit immediate into a signed value. */
|
||||
#define I32(a) ((UINT32_C(a) >= UINT32_C(0x80000000)) ? \
|
||||
-(int32_t)(UINT32_C(0xffffffff) - UINT32_C(a)) - 1 : (int32_t)INT32_C(a))
|
||||
|
||||
/* Load four unsigned 32-bit immediates into a vector register. */
|
||||
#define IMM4(a, b, c, d) _mm_set_epi32(I32(a), I32(b), I32(c), I32(d))
|
||||
|
||||
/* Run four rounds of SHA256. */
|
||||
#define RND4(S, W, K0, K1, K2, K3) do { \
|
||||
__m128i M; \
|
||||
\
|
||||
/* Add the next four words of message schedule and round constants. */ \
|
||||
M = _mm_add_epi32(W, IMM4(K3, K2, K1, K0)); \
|
||||
\
|
||||
/* Perform two rounds of SHA256, using the low two words in M. */ \
|
||||
S[1] = _mm_sha256rnds2_epu32(S[1], S[0], M); \
|
||||
\
|
||||
/* Shift the two words of M down and perform the next two rounds. */ \
|
||||
M = _mm_srli_si128(M, 8); \
|
||||
S[0] = _mm_sha256rnds2_epu32(S[0], S[1], M); \
|
||||
} while (0)
|
||||
|
||||
/* Compute the ith set of four words of message schedule. */
|
||||
#define MSG4(W, i) do { \
|
||||
W[(i + 0) % 4] = _mm_sha256msg1_epu32(W[(i + 0) % 4], W[(i + 1) % 4]); \
|
||||
W[(i + 0) % 4] = _mm_add_epi32(W[(i + 0) % 4], \
|
||||
_mm_alignr_epi8(W[(i + 3) % 4], W[(i + 2) % 4], 4)); \
|
||||
W[(i + 0) % 4] = _mm_sha256msg2_epu32(W[(i + 0) % 4], W[(i + 3) % 4]); \
|
||||
} while (0)
|
||||
|
||||
/* Perform 4 rounds of SHA256 and generate more message schedule if needed. */
|
||||
#define RNDMSG(S, W, i, K0, K1, K2, K3) do { \
|
||||
RND4(S, W[i % 4], K0, K1, K2, K3); \
|
||||
if (i < 12) \
|
||||
MSG4(W, i + 4); \
|
||||
} while (0)
|
||||
|
||||
/**
|
||||
* SHA256_Transform_shani(state, block):
|
||||
* Compute the SHA256 block compression function, transforming ${state} using
|
||||
* the data in ${block}. This implementation uses x86 SHANI and SSSE3
|
||||
* instructions, and should only be used if CPUSUPPORT_X86_SHANI and _SSSE3
|
||||
* are defined and cpusupport_x86_shani() and _ssse3() return nonzero.
|
||||
*/
|
||||
void
|
||||
SHA256_Transform_shani(uint32_t state[PHP_STATIC_RESTRICT 8],
|
||||
const uint8_t block[PHP_STATIC_RESTRICT 64])
|
||||
{
|
||||
__m128i S3210, S7654;
|
||||
__m128i S0123, S4567;
|
||||
__m128i S0145, S2367;
|
||||
__m128i W[4];
|
||||
__m128i S[2];
|
||||
|
||||
/* Load state. */
|
||||
S3210 = _mm_loadu_si128((const __m128i *)&state[0]);
|
||||
S7654 = _mm_loadu_si128((const __m128i *)&state[4]);
|
||||
|
||||
/* Shuffle the 8 32-bit values into the order we need them. */
|
||||
S0123 = _mm_shuffle_epi32(S3210, 0x1B);
|
||||
S4567 = _mm_shuffle_epi32(S7654, 0x1B);
|
||||
S0145 = _mm_unpackhi_epi64(S4567, S0123);
|
||||
S2367 = _mm_unpacklo_epi64(S4567, S0123);
|
||||
|
||||
/* Load input block; this is the start of the message schedule. */
|
||||
W[0] = be32dec_128(&block[0]);
|
||||
W[1] = be32dec_128(&block[16]);
|
||||
W[2] = be32dec_128(&block[32]);
|
||||
W[3] = be32dec_128(&block[48]);
|
||||
|
||||
/* Initialize working variables. */
|
||||
S[0] = S0145;
|
||||
S[1] = S2367;
|
||||
|
||||
/* Perform 64 rounds, 4 at a time. */
|
||||
RNDMSG(S, W, 0, 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5);
|
||||
RNDMSG(S, W, 1, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5);
|
||||
RNDMSG(S, W, 2, 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3);
|
||||
RNDMSG(S, W, 3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174);
|
||||
RNDMSG(S, W, 4, 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc);
|
||||
RNDMSG(S, W, 5, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da);
|
||||
RNDMSG(S, W, 6, 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7);
|
||||
RNDMSG(S, W, 7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967);
|
||||
RNDMSG(S, W, 8, 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13);
|
||||
RNDMSG(S, W, 9, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85);
|
||||
RNDMSG(S, W, 10, 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3);
|
||||
RNDMSG(S, W, 11, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070);
|
||||
RNDMSG(S, W, 12, 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5);
|
||||
RNDMSG(S, W, 13, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3);
|
||||
RNDMSG(S, W, 14, 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208);
|
||||
RNDMSG(S, W, 15, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2);
|
||||
|
||||
/* Mix local working variables into global state. */
|
||||
S0145 = _mm_add_epi32(S0145, S[0]);
|
||||
S2367 = _mm_add_epi32(S2367, S[1]);
|
||||
|
||||
/* Shuffle state back to the original word order and store. */
|
||||
S0123 = _mm_unpackhi_epi64(S2367, S0145);
|
||||
S4567 = _mm_unpacklo_epi64(S2367, S0145);
|
||||
S3210 = _mm_shuffle_epi32(S0123, 0x1B);
|
||||
S7654 = _mm_shuffle_epi32(S4567, 0x1B);
|
||||
_mm_storeu_si128((__m128i *)&state[0], S3210);
|
||||
_mm_storeu_si128((__m128i *)&state[4], S7654);
|
||||
}
|
||||
|
||||
#endif
|
257
ext/hash/hash_sha_sse2.c
Normal file
257
ext/hash/hash_sha_sse2.c
Normal file
@ -0,0 +1,257 @@
|
||||
/*-
|
||||
* Copyright 2021 Tarsnap Backup Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "php_hash.h"
|
||||
#include "php_hash_sha.h"
|
||||
|
||||
#ifdef __SSE2__
|
||||
# include <emmintrin.h>
|
||||
|
||||
/* Original implementation from libcperciva follows.
|
||||
*
|
||||
* Modified to use `PHP_STATIC_RESTRICT` for MSVC compatibility.
|
||||
*/
|
||||
|
||||
/**
|
||||
* mm_bswap_epi32(a):
|
||||
* Byte-swap each 32-bit word.
|
||||
*/
|
||||
static inline __m128i
|
||||
mm_bswap_epi32(__m128i a)
|
||||
{
|
||||
|
||||
/* Swap bytes in each 16-bit word. */
|
||||
a = _mm_or_si128(_mm_slli_epi16(a, 8), _mm_srli_epi16(a, 8));
|
||||
|
||||
/* Swap all 16-bit words. */
|
||||
a = _mm_shufflelo_epi16(a, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
a = _mm_shufflehi_epi16(a, _MM_SHUFFLE(2, 3, 0, 1));
|
||||
|
||||
return (a);
|
||||
}
|
||||
|
||||
/* SHA256 round constants. */
|
||||
static const uint32_t Krnd[64] = {
|
||||
0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
|
||||
0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
|
||||
0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
|
||||
0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
|
||||
0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
|
||||
0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
|
||||
0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
|
||||
0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
|
||||
0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
|
||||
0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
|
||||
0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
|
||||
0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
|
||||
0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
|
||||
0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
|
||||
0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
|
||||
0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
|
||||
};
|
||||
|
||||
/* Elementary functions used by SHA256 */
|
||||
#define Ch(x, y, z) ((x & (y ^ z)) ^ z)
|
||||
#define Maj(x, y, z) ((x & (y | z)) | (y & z))
|
||||
#define ROTR(x, n) ((x >> n) | (x << (32 - n)))
|
||||
#define S0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
|
||||
#define S1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
|
||||
|
||||
/* SHA256 round function */
|
||||
#define RND(a, b, c, d, e, f, g, h, k) \
|
||||
h += S1(e) + Ch(e, f, g) + k; \
|
||||
d += h; \
|
||||
h += S0(a) + Maj(a, b, c)
|
||||
|
||||
/* Adjusted round function for rotating state */
|
||||
#define RNDr(S, W, i, ii) \
|
||||
RND(S[(64 - i) % 8], S[(65 - i) % 8], \
|
||||
S[(66 - i) % 8], S[(67 - i) % 8], \
|
||||
S[(68 - i) % 8], S[(69 - i) % 8], \
|
||||
S[(70 - i) % 8], S[(71 - i) % 8], \
|
||||
W[i + ii] + Krnd[i + ii])
|
||||
|
||||
/* Message schedule computation */
|
||||
#define SHR32(x, n) (_mm_srli_epi32(x, n))
|
||||
#define ROTR32(x, n) (_mm_or_si128(SHR32(x, n), _mm_slli_epi32(x, (32-n))))
|
||||
#define s0_128(x) _mm_xor_si128(_mm_xor_si128( \
|
||||
ROTR32(x, 7), ROTR32(x, 18)), SHR32(x, 3))
|
||||
|
||||
static inline __m128i
|
||||
s1_128_high(__m128i a)
|
||||
{
|
||||
__m128i b;
|
||||
__m128i c;
|
||||
|
||||
/* ROTR, loading data as {B, B, A, A}; lanes 1 & 3 will be junk. */
|
||||
b = _mm_shuffle_epi32(a, _MM_SHUFFLE(1, 1, 0, 0));
|
||||
c = _mm_xor_si128(_mm_srli_epi64(b, 17), _mm_srli_epi64(b, 19));
|
||||
|
||||
/* Shift and XOR with rotated data; lanes 1 & 3 will be junk. */
|
||||
c = _mm_xor_si128(c, _mm_srli_epi32(b, 10));
|
||||
|
||||
/* Shuffle good data back and zero unwanted lanes. */
|
||||
c = _mm_shuffle_epi32(c, _MM_SHUFFLE(2, 0, 2, 0));
|
||||
c = _mm_slli_si128(c, 8);
|
||||
|
||||
return (c);
|
||||
}
|
||||
|
||||
static inline __m128i
|
||||
s1_128_low(__m128i a)
|
||||
{
|
||||
__m128i b;
|
||||
__m128i c;
|
||||
|
||||
/* ROTR, loading data as {B, B, A, A}; lanes 1 & 3 will be junk. */
|
||||
b = _mm_shuffle_epi32(a, _MM_SHUFFLE(3, 3, 2, 2));
|
||||
c = _mm_xor_si128(_mm_srli_epi64(b, 17), _mm_srli_epi64(b, 19));
|
||||
|
||||
/* Shift and XOR with rotated data; lanes 1 & 3 will be junk. */
|
||||
c = _mm_xor_si128(c, _mm_srli_epi32(b, 10));
|
||||
|
||||
/* Shuffle good data back and zero unwanted lanes. */
|
||||
c = _mm_shuffle_epi32(c, _MM_SHUFFLE(2, 0, 2, 0));
|
||||
c = _mm_srli_si128(c, 8);
|
||||
|
||||
return (c);
|
||||
}
|
||||
|
||||
/**
|
||||
* SPAN_ONE_THREE(a, b):
|
||||
* Combine the upper three words of ${a} with the lowest word of ${b}. This
|
||||
* could also be thought of returning bits [159:32] of the 256-bit value
|
||||
* consisting of (b[127:0] a[127:0]). In other words, set:
|
||||
* dst[31:0] := a[63:32]
|
||||
* dst[63:32] := a[95:64]
|
||||
* dst[95:64] := a[127:96]
|
||||
* dst[127:96] := b[31:0]
|
||||
*/
|
||||
#define SPAN_ONE_THREE(a, b) (_mm_shuffle_epi32(_mm_castps_si128( \
|
||||
_mm_move_ss(_mm_castsi128_ps(a), _mm_castsi128_ps(b))), \
|
||||
_MM_SHUFFLE(0, 3, 2, 1)))
|
||||
|
||||
/**
|
||||
* MSG4(X0, X1, X2, X3):
|
||||
* Calculate the next four values of the message schedule. If we define
|
||||
* ${W[j]} as the first unknown value in the message schedule, then the input
|
||||
* arguments are:
|
||||
* X0 = W[j - 16] : W[j - 13]
|
||||
* X1 = W[j - 12] : W[j - 9]
|
||||
* X2 = W[j - 8] : W[j - 5]
|
||||
* X3 = W[j - 4] : W[j - 1]
|
||||
* This function therefore calculates:
|
||||
* X4 = W[j + 0] : W[j + 3]
|
||||
*/
|
||||
static inline __m128i
|
||||
MSG4(__m128i X0, __m128i X1, __m128i X2, __m128i X3)
|
||||
{
|
||||
__m128i X4;
|
||||
__m128i Xj_minus_seven, Xj_minus_fifteen;
|
||||
|
||||
/* Set up variables which span X values. */
|
||||
Xj_minus_seven = SPAN_ONE_THREE(X2, X3);
|
||||
Xj_minus_fifteen = SPAN_ONE_THREE(X0, X1);
|
||||
|
||||
/* Begin computing X4. */
|
||||
X4 = _mm_add_epi32(X0, Xj_minus_seven);
|
||||
X4 = _mm_add_epi32(X4, s0_128(Xj_minus_fifteen));
|
||||
|
||||
/* First half of s1. */
|
||||
X4 = _mm_add_epi32(X4, s1_128_low(X3));
|
||||
|
||||
/* Second half of s1; this depends on the above value of X4. */
|
||||
X4 = _mm_add_epi32(X4, s1_128_high(X4));
|
||||
|
||||
return (X4);
|
||||
}
|
||||
|
||||
/**
|
||||
* SHA256_Transform_sse2(state, block, W, S):
|
||||
* Compute the SHA256 block compression function, transforming ${state} using
|
||||
* the data in ${block}. This implementation uses x86 SSE2 instructions, and
|
||||
* should only be used if _SSE2 is defined and cpusupport_x86_sse2() returns
|
||||
* nonzero. The arrays W and S may be filled with sensitive data, and should
|
||||
* be cleared by the callee.
|
||||
*/
|
||||
void
|
||||
SHA256_Transform_sse2(uint32_t state[PHP_STATIC_RESTRICT 8],
|
||||
const uint8_t block[PHP_STATIC_RESTRICT 64], uint32_t W[PHP_STATIC_RESTRICT 64],
|
||||
uint32_t S[PHP_STATIC_RESTRICT 8])
|
||||
{
|
||||
__m128i Y[4];
|
||||
int i;
|
||||
|
||||
/* 1. Prepare the first part of the message schedule W. */
|
||||
Y[0] = mm_bswap_epi32(_mm_loadu_si128((const __m128i *)&block[0]));
|
||||
_mm_storeu_si128((__m128i *)&W[0], Y[0]);
|
||||
Y[1] = mm_bswap_epi32(_mm_loadu_si128((const __m128i *)&block[16]));
|
||||
_mm_storeu_si128((__m128i *)&W[4], Y[1]);
|
||||
Y[2] = mm_bswap_epi32(_mm_loadu_si128((const __m128i *)&block[32]));
|
||||
_mm_storeu_si128((__m128i *)&W[8], Y[2]);
|
||||
Y[3] = mm_bswap_epi32(_mm_loadu_si128((const __m128i *)&block[48]));
|
||||
_mm_storeu_si128((__m128i *)&W[12], Y[3]);
|
||||
|
||||
/* 2. Initialize working variables. */
|
||||
memcpy(S, state, 32);
|
||||
|
||||
/* 3. Mix. */
|
||||
for (i = 0; i < 64; i += 16) {
|
||||
RNDr(S, W, 0, i);
|
||||
RNDr(S, W, 1, i);
|
||||
RNDr(S, W, 2, i);
|
||||
RNDr(S, W, 3, i);
|
||||
RNDr(S, W, 4, i);
|
||||
RNDr(S, W, 5, i);
|
||||
RNDr(S, W, 6, i);
|
||||
RNDr(S, W, 7, i);
|
||||
RNDr(S, W, 8, i);
|
||||
RNDr(S, W, 9, i);
|
||||
RNDr(S, W, 10, i);
|
||||
RNDr(S, W, 11, i);
|
||||
RNDr(S, W, 12, i);
|
||||
RNDr(S, W, 13, i);
|
||||
RNDr(S, W, 14, i);
|
||||
RNDr(S, W, 15, i);
|
||||
|
||||
if (i == 48)
|
||||
break;
|
||||
Y[0] = MSG4(Y[0], Y[1], Y[2], Y[3]);
|
||||
_mm_storeu_si128((__m128i *)&W[16 + i + 0], Y[0]);
|
||||
Y[1] = MSG4(Y[1], Y[2], Y[3], Y[0]);
|
||||
_mm_storeu_si128((__m128i *)&W[16 + i + 4], Y[1]);
|
||||
Y[2] = MSG4(Y[2], Y[3], Y[0], Y[1]);
|
||||
_mm_storeu_si128((__m128i *)&W[16 + i + 8], Y[2]);
|
||||
Y[3] = MSG4(Y[3], Y[0], Y[1], Y[2]);
|
||||
_mm_storeu_si128((__m128i *)&W[16 + i + 12], Y[3]);
|
||||
}
|
||||
|
||||
/* 4. Mix local working variables into global state. */
|
||||
for (i = 0; i < 8; i++)
|
||||
state[i] += S[i];
|
||||
}
|
||||
|
||||
#endif
|
@ -45,6 +45,27 @@ typedef struct {
|
||||
#define PHP_SHA256Init(ctx) PHP_SHA256InitArgs(ctx, NULL)
|
||||
PHP_HASH_API void PHP_SHA256InitArgs(PHP_SHA256_CTX *, ZEND_ATTRIBUTE_UNUSED HashTable *);
|
||||
PHP_HASH_API void PHP_SHA256Update(PHP_SHA256_CTX *, const unsigned char *, size_t);
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# define PHP_STATIC_RESTRICT
|
||||
#else
|
||||
# define PHP_STATIC_RESTRICT static restrict
|
||||
#endif
|
||||
|
||||
#if defined(__SSE2__)
|
||||
void SHA256_Transform_sse2(uint32_t state[PHP_STATIC_RESTRICT 8], const uint8_t block[PHP_STATIC_RESTRICT 64], uint32_t W[PHP_STATIC_RESTRICT 64], uint32_t S[PHP_STATIC_RESTRICT 8]);
|
||||
#endif
|
||||
|
||||
#if (defined(__i386__) || defined(__x86_64__)) && defined(HAVE_IMMINTRIN_H)
|
||||
# if defined(__SSSE3__) && defined(__SHA__)
|
||||
# define PHP_HASH_INTRIN_SHA_NATIVE 1
|
||||
# elif defined(HAVE_FUNC_ATTRIBUTE_TARGET)
|
||||
# define PHP_HASH_INTRIN_SHA_RESOLVER 1
|
||||
# endif
|
||||
|
||||
void SHA256_Transform_shani(uint32_t state[PHP_STATIC_RESTRICT 8], const uint8_t block[PHP_STATIC_RESTRICT 64]);
|
||||
#endif
|
||||
|
||||
PHP_HASH_API void PHP_SHA256Final(unsigned char[32], PHP_SHA256_CTX *);
|
||||
|
||||
/* SHA384 context */
|
||||
|
Loading…
Reference in New Issue
Block a user