mirror of
https://github.com/php/php-src.git
synced 2024-12-04 23:34:25 +08:00
X86: Fast CRC32 computation using PCLMULQDQ instruction
Based on: "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ Instruction" V. Gopal, E. Ozturk, et al., 2009, http://intel.ly/2ySEwL0 Signed-off-by: Frank Du <frank.du@intel.com> Closes GH-6018
This commit is contained in:
parent
cb284f668c
commit
c3299d7dab
@ -159,6 +159,17 @@ static zend_always_inline int zend_cpu_supports_sse42() {
|
|||||||
return __builtin_cpu_supports("sse4.2");
|
return __builtin_cpu_supports("sse4.2");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* __builtin_cpu_supports has pclmul from gcc9 */
|
||||||
|
#if (!defined(__GNUC__) || (ZEND_GCC_VERSION >= 9000))
|
||||||
|
ZEND_NO_SANITIZE_ADDRESS
|
||||||
|
static zend_always_inline int zend_cpu_supports_pclmul() {
|
||||||
|
#if PHP_HAVE_BUILTIN_CPU_INIT
|
||||||
|
__builtin_cpu_init();
|
||||||
|
#endif
|
||||||
|
return __builtin_cpu_supports("pclmul");
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
ZEND_NO_SANITIZE_ADDRESS
|
ZEND_NO_SANITIZE_ADDRESS
|
||||||
static zend_always_inline int zend_cpu_supports_avx() {
|
static zend_always_inline int zend_cpu_supports_avx() {
|
||||||
#if PHP_HAVE_BUILTIN_CPU_INIT
|
#if PHP_HAVE_BUILTIN_CPU_INIT
|
||||||
@ -196,6 +207,10 @@ static zend_always_inline int zend_cpu_supports_sse42() {
|
|||||||
return zend_cpu_supports(ZEND_CPU_FEATURE_SSE42);
|
return zend_cpu_supports(ZEND_CPU_FEATURE_SSE42);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static zend_always_inline int zend_cpu_supports_pclmul() {
|
||||||
|
return zend_cpu_supports(ZEND_CPU_FEATURE_PCLMULQDQ);
|
||||||
|
}
|
||||||
|
|
||||||
static zend_always_inline int zend_cpu_supports_avx() {
|
static zend_always_inline int zend_cpu_supports_avx() {
|
||||||
return zend_cpu_supports(ZEND_CPU_FEATURE_AVX);
|
return zend_cpu_supports(ZEND_CPU_FEATURE_AVX);
|
||||||
}
|
}
|
||||||
|
@ -487,6 +487,10 @@ extern "C++" {
|
|||||||
# define PHP_HAVE_SSE4_2
|
# define PHP_HAVE_SSE4_2
|
||||||
# endif
|
# endif
|
||||||
|
|
||||||
|
# if defined(HAVE_WMMINTRIN_H)
|
||||||
|
# define PHP_HAVE_PCLMUL
|
||||||
|
# endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* AVX2 support was added in gcc 4.7, but AVX2 intrinsics don't work in
|
* AVX2 support was added in gcc 4.7, but AVX2 intrinsics don't work in
|
||||||
* __attribute__((target("avx2"))) functions until gcc 4.9.
|
* __attribute__((target("avx2"))) functions until gcc 4.9.
|
||||||
@ -547,6 +551,58 @@ extern "C++" {
|
|||||||
# define ZEND_INTRIN_SSE4_2_FUNC_DECL(func)
|
# define ZEND_INTRIN_SSE4_2_FUNC_DECL(func)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef __PCLMUL__
|
||||||
|
/* Instructions compiled directly. */
|
||||||
|
# define ZEND_INTRIN_PCLMUL_NATIVE 1
|
||||||
|
#elif (defined(HAVE_FUNC_ATTRIBUTE_TARGET) && defined(PHP_HAVE_PCLMUL)) || defined(ZEND_WIN32)
|
||||||
|
/* Function resolved by ifunc or MINIT. */
|
||||||
|
# define ZEND_INTRIN_PCLMUL_RESOLVER 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Do not use for conditional declaration of API functions! */
|
||||||
|
#if defined(ZEND_INTRIN_PCLMUL_RESOLVER) && defined(ZEND_INTRIN_HAVE_IFUNC_TARGET) && (!defined(__GNUC__) || (ZEND_GCC_VERSION >= 9000))
|
||||||
|
/* __builtin_cpu_supports has pclmul from gcc9 */
|
||||||
|
# define ZEND_INTRIN_PCLMUL_FUNC_PROTO 1
|
||||||
|
#elif defined(ZEND_INTRIN_PCLMUL_RESOLVER)
|
||||||
|
# define ZEND_INTRIN_PCLMUL_FUNC_PTR 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef ZEND_INTRIN_PCLMUL_RESOLVER
|
||||||
|
# ifdef HAVE_FUNC_ATTRIBUTE_TARGET
|
||||||
|
# define ZEND_INTRIN_PCLMUL_FUNC_DECL(func) ZEND_API func __attribute__((target("pclmul")))
|
||||||
|
# else
|
||||||
|
# define ZEND_INTRIN_PCLMUL_FUNC_DECL(func) func
|
||||||
|
# endif
|
||||||
|
#else
|
||||||
|
# define ZEND_INTRIN_PCLMUL_FUNC_DECL(func)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(ZEND_INTRIN_SSE4_2_NATIVE) && defined(ZEND_INTRIN_PCLMUL_NATIVE)
|
||||||
|
/* Instructions compiled directly. */
|
||||||
|
# define ZEND_INTRIN_SSE4_2_PCLMUL_NATIVE 1
|
||||||
|
#elif (defined(HAVE_FUNC_ATTRIBUTE_TARGET) && defined(PHP_HAVE_SSE4_2) && defined(PHP_HAVE_PCLMUL)) || defined(ZEND_WIN32)
|
||||||
|
/* Function resolved by ifunc or MINIT. */
|
||||||
|
# define ZEND_INTRIN_SSE4_2_PCLMUL_RESOLVER 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Do not use for conditional declaration of API functions! */
|
||||||
|
#if defined(ZEND_INTRIN_SSE4_2_PCLMUL_RESOLVER) && defined(ZEND_INTRIN_HAVE_IFUNC_TARGET) && (!defined(__GNUC__) || (ZEND_GCC_VERSION >= 9000))
|
||||||
|
/* __builtin_cpu_supports has pclmul from gcc9 */
|
||||||
|
# define ZEND_INTRIN_SSE4_2_PCLMUL_FUNC_PROTO 1
|
||||||
|
#elif defined(ZEND_INTRIN_SSE4_2_PCLMUL_RESOLVER)
|
||||||
|
# define ZEND_INTRIN_SSE4_2_PCLMUL_FUNC_PTR 1
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef ZEND_INTRIN_SSE4_2_PCLMUL_RESOLVER
|
||||||
|
# ifdef HAVE_FUNC_ATTRIBUTE_TARGET
|
||||||
|
# define ZEND_INTRIN_SSE4_2_PCLMUL_FUNC_DECL(func) ZEND_API func __attribute__((target("sse4.2,pclmul")))
|
||||||
|
# else
|
||||||
|
# define ZEND_INTRIN_SSE4_2_PCLMUL_FUNC_DECL(func) func
|
||||||
|
# endif
|
||||||
|
#else
|
||||||
|
# define ZEND_INTRIN_SSE4_2_PCLMUL_FUNC_DECL(func)
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef __AVX2__
|
#ifdef __AVX2__
|
||||||
# define ZEND_INTRIN_AVX2_NATIVE 1
|
# define ZEND_INTRIN_AVX2_NATIVE 1
|
||||||
#elif (defined(HAVE_FUNC_ATTRIBUTE_TARGET) && defined(PHP_HAVE_AVX2)) || defined(ZEND_WIN32)
|
#elif (defined(HAVE_FUNC_ATTRIBUTE_TARGET) && defined(PHP_HAVE_AVX2)) || defined(ZEND_WIN32)
|
||||||
|
@ -409,6 +409,7 @@ sys/ipc.h \
|
|||||||
dlfcn.h \
|
dlfcn.h \
|
||||||
tmmintrin.h \
|
tmmintrin.h \
|
||||||
nmmintrin.h \
|
nmmintrin.h \
|
||||||
|
wmmintrin.h \
|
||||||
immintrin.h
|
immintrin.h
|
||||||
],[],[],[
|
],[],[],[
|
||||||
#ifdef HAVE_SYS_PARAM_H
|
#ifdef HAVE_SYS_PARAM_H
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
#include "php_hash.h"
|
#include "php_hash.h"
|
||||||
#include "php_hash_crc32.h"
|
#include "php_hash_crc32.h"
|
||||||
#include "php_hash_crc32_tables.h"
|
#include "php_hash_crc32_tables.h"
|
||||||
|
#include "ext/standard/crc32_x86.h"
|
||||||
|
|
||||||
PHP_HASH_API void PHP_CRC32Init(PHP_CRC32_CTX *context)
|
PHP_HASH_API void PHP_CRC32Init(PHP_CRC32_CTX *context)
|
||||||
{
|
{
|
||||||
@ -26,27 +27,39 @@ PHP_HASH_API void PHP_CRC32Init(PHP_CRC32_CTX *context)
|
|||||||
|
|
||||||
PHP_HASH_API void PHP_CRC32Update(PHP_CRC32_CTX *context, const unsigned char *input, size_t len)
|
PHP_HASH_API void PHP_CRC32Update(PHP_CRC32_CTX *context, const unsigned char *input, size_t len)
|
||||||
{
|
{
|
||||||
size_t i;
|
size_t i = 0;
|
||||||
|
|
||||||
for (i = 0; i < len; ++i) {
|
#if ZEND_INTRIN_SSE4_2_PCLMUL_NATIVE || ZEND_INTRIN_SSE4_2_PCLMUL_RESOLVER
|
||||||
|
i += crc32_x86_simd_update(X86_CRC32, &context->state, input, len);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for (; i < len; ++i) {
|
||||||
context->state = (context->state << 8) ^ crc32_table[(context->state >> 24) ^ (input[i] & 0xff)];
|
context->state = (context->state << 8) ^ crc32_table[(context->state >> 24) ^ (input[i] & 0xff)];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
PHP_HASH_API void PHP_CRC32BUpdate(PHP_CRC32_CTX *context, const unsigned char *input, size_t len)
|
PHP_HASH_API void PHP_CRC32BUpdate(PHP_CRC32_CTX *context, const unsigned char *input, size_t len)
|
||||||
{
|
{
|
||||||
size_t i;
|
size_t i = 0;
|
||||||
|
|
||||||
for (i = 0; i < len; ++i) {
|
#if ZEND_INTRIN_SSE4_2_PCLMUL_NATIVE || ZEND_INTRIN_SSE4_2_PCLMUL_RESOLVER
|
||||||
|
i += crc32_x86_simd_update(X86_CRC32B, &context->state, input, len);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for (; i < len; ++i) {
|
||||||
context->state = (context->state >> 8) ^ crc32b_table[(context->state ^ input[i]) & 0xff];
|
context->state = (context->state >> 8) ^ crc32b_table[(context->state ^ input[i]) & 0xff];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
PHP_HASH_API void PHP_CRC32CUpdate(PHP_CRC32_CTX *context, const unsigned char *input, size_t len)
|
PHP_HASH_API void PHP_CRC32CUpdate(PHP_CRC32_CTX *context, const unsigned char *input, size_t len)
|
||||||
{
|
{
|
||||||
size_t i;
|
size_t i = 0;
|
||||||
|
|
||||||
for (i = 0; i < len; ++i) {
|
#if ZEND_INTRIN_SSE4_2_PCLMUL_NATIVE || ZEND_INTRIN_SSE4_2_PCLMUL_RESOLVER
|
||||||
|
i += crc32_x86_simd_update(X86_CRC32C, &context->state, input, len);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for (; i < len; ++i) {
|
||||||
context->state = (context->state >> 8) ^ crc32c_table[(context->state ^ input[i]) & 0xff];
|
context->state = (context->state >> 8) ^ crc32c_table[(context->state ^ input[i]) & 0xff];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -10,6 +10,20 @@ echo hash('crc32', 'message digest'), "\n";
|
|||||||
echo hash('crc32', 'abcdefghijklmnopqrstuvwxyz'), "\n";
|
echo hash('crc32', 'abcdefghijklmnopqrstuvwxyz'), "\n";
|
||||||
echo hash('crc32', 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'), "\n";
|
echo hash('crc32', 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'), "\n";
|
||||||
echo hash('crc32', '12345678901234567890123456789012345678901234567890123456789012345678901234567890'), "\n";
|
echo hash('crc32', '12345678901234567890123456789012345678901234567890123456789012345678901234567890'), "\n";
|
||||||
|
echo hash('crc32', '1234567890123456'), "\n";
|
||||||
|
echo hash('crc32', '1234567890123456abc'), "\n";
|
||||||
|
echo hash('crc32', '12345678901234561234567890123456'), "\n";
|
||||||
|
echo hash('crc32', '12345678901234561234567890123456abc'), "\n";
|
||||||
|
echo hash('crc32', '123456789012345612345678901234561234567890123456'), "\n";
|
||||||
|
echo hash('crc32', '123456789012345612345678901234561234567890123456abc'), "\n";
|
||||||
|
echo hash('crc32', '1234567890123456123456789012345612345678901234561234567890123456'), "\n";
|
||||||
|
echo hash('crc32', '1234567890123456123456789012345612345678901234561234567890123456abc'), "\n";
|
||||||
|
echo hash('crc32', '12345678901234561234567890123456123456789012345612345678901234561234567890123456'), "\n";
|
||||||
|
echo hash('crc32', '12345678901234561234567890123456123456789012345612345678901234561234567890123456abc'), "\n";
|
||||||
|
echo hash('crc32', '12345678901234561234567890123456123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456'), "\n";
|
||||||
|
echo hash('crc32', '12345678901234561234567890123456123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456abc'), "\n";
|
||||||
|
echo hash('crc32', '123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456'), "\n";
|
||||||
|
echo hash('crc32', '123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456abc'), "\n";
|
||||||
|
|
||||||
echo "crc32b\n";
|
echo "crc32b\n";
|
||||||
echo hash('crc32b', ''), "\n";
|
echo hash('crc32b', ''), "\n";
|
||||||
@ -19,6 +33,20 @@ echo hash('crc32b', 'message digest'), "\n";
|
|||||||
echo hash('crc32b', 'abcdefghijklmnopqrstuvwxyz'), "\n";
|
echo hash('crc32b', 'abcdefghijklmnopqrstuvwxyz'), "\n";
|
||||||
echo hash('crc32b', 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'), "\n";
|
echo hash('crc32b', 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'), "\n";
|
||||||
echo hash('crc32b', '12345678901234567890123456789012345678901234567890123456789012345678901234567890'), "\n";
|
echo hash('crc32b', '12345678901234567890123456789012345678901234567890123456789012345678901234567890'), "\n";
|
||||||
|
echo hash('crc32b', '1234567890123456'), "\n";
|
||||||
|
echo hash('crc32b', '1234567890123456abc'), "\n";
|
||||||
|
echo hash('crc32b', '12345678901234561234567890123456'), "\n";
|
||||||
|
echo hash('crc32b', '12345678901234561234567890123456abc'), "\n";
|
||||||
|
echo hash('crc32b', '123456789012345612345678901234561234567890123456'), "\n";
|
||||||
|
echo hash('crc32b', '123456789012345612345678901234561234567890123456abc'), "\n";
|
||||||
|
echo hash('crc32b', '1234567890123456123456789012345612345678901234561234567890123456'), "\n";
|
||||||
|
echo hash('crc32b', '1234567890123456123456789012345612345678901234561234567890123456abc'), "\n";
|
||||||
|
echo hash('crc32b', '12345678901234561234567890123456123456789012345612345678901234561234567890123456'), "\n";
|
||||||
|
echo hash('crc32b', '12345678901234561234567890123456123456789012345612345678901234561234567890123456abc'), "\n";
|
||||||
|
echo hash('crc32b', '12345678901234561234567890123456123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456'), "\n";
|
||||||
|
echo hash('crc32b', '12345678901234561234567890123456123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456abc'), "\n";
|
||||||
|
echo hash('crc32b', '123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456'), "\n";
|
||||||
|
echo hash('crc32b', '123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456abc'), "\n";
|
||||||
|
|
||||||
echo "crc32c\n";
|
echo "crc32c\n";
|
||||||
echo hash('crc32c', ''), "\n";
|
echo hash('crc32c', ''), "\n";
|
||||||
@ -59,6 +87,20 @@ echo hash('crc32c', "Even if I could be Shakespeare, I think I should still choo
|
|||||||
echo hash('crc32c', "The fugacity of a constituent in a mixture of gases at a given temperature is proportional to its mole fraction. Lewis-Randall Rule"), "\n";
|
echo hash('crc32c', "The fugacity of a constituent in a mixture of gases at a given temperature is proportional to its mole fraction. Lewis-Randall Rule"), "\n";
|
||||||
echo hash('crc32c', "How can you write a big system without C++? -Paul Glick"), "\n";
|
echo hash('crc32c', "How can you write a big system without C++? -Paul Glick"), "\n";
|
||||||
echo hash('crc32c', "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#\$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"), "\n";
|
echo hash('crc32c', "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n\v\f\r\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f !\"#\$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\x7f\x80\x81\x82\x83\x84\x85\x86\x87\x88\x89\x8a\x8b\x8c\x8d\x8e\x8f\x90\x91\x92\x93\x94\x95\x96\x97\x98\x99\x9a\x9b\x9c\x9d\x9e\x9f\xa0\xa1\xa2\xa3\xa4\xa5\xa6\xa7\xa8\xa9\xaa\xab\xac\xad\xae\xaf\xb0\xb1\xb2\xb3\xb4\xb5\xb6\xb7\xb8\xb9\xba\xbb\xbc\xbd\xbe\xbf\xc0\xc1\xc2\xc3\xc4\xc5\xc6\xc7\xc8\xc9\xca\xcb\xcc\xcd\xce\xcf\xd0\xd1\xd2\xd3\xd4\xd5\xd6\xd7\xd8\xd9\xda\xdb\xdc\xdd\xde\xdf\xe0\xe1\xe2\xe3\xe4\xe5\xe6\xe7\xe8\xe9\xea\xeb\xec\xed\xee\xef\xf0\xf1\xf2\xf3\xf4\xf5\xf6\xf7\xf8\xf9\xfa\xfb\xfc\xfd\xfe\xff"), "\n";
|
||||||
|
echo hash('crc32c', '1234567890123456'), "\n";
|
||||||
|
echo hash('crc32c', '1234567890123456abc'), "\n";
|
||||||
|
echo hash('crc32c', '12345678901234561234567890123456'), "\n";
|
||||||
|
echo hash('crc32c', '12345678901234561234567890123456abc'), "\n";
|
||||||
|
echo hash('crc32c', '123456789012345612345678901234561234567890123456'), "\n";
|
||||||
|
echo hash('crc32c', '123456789012345612345678901234561234567890123456abc'), "\n";
|
||||||
|
echo hash('crc32c', '1234567890123456123456789012345612345678901234561234567890123456'), "\n";
|
||||||
|
echo hash('crc32c', '1234567890123456123456789012345612345678901234561234567890123456abc'), "\n";
|
||||||
|
echo hash('crc32c', '12345678901234561234567890123456123456789012345612345678901234561234567890123456'), "\n";
|
||||||
|
echo hash('crc32c', '12345678901234561234567890123456123456789012345612345678901234561234567890123456abc'), "\n";
|
||||||
|
echo hash('crc32c', '12345678901234561234567890123456123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456'), "\n";
|
||||||
|
echo hash('crc32c', '12345678901234561234567890123456123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456abc'), "\n";
|
||||||
|
echo hash('crc32c', '123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456'), "\n";
|
||||||
|
echo hash('crc32c', '123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456abc'), "\n";
|
||||||
|
|
||||||
?>
|
?>
|
||||||
--EXPECT--
|
--EXPECT--
|
||||||
@ -70,6 +112,20 @@ crc32
|
|||||||
9693bf77
|
9693bf77
|
||||||
882174a0
|
882174a0
|
||||||
96790816
|
96790816
|
||||||
|
98b0e78d
|
||||||
|
a6f33d71
|
||||||
|
900a1d38
|
||||||
|
396978fe
|
||||||
|
adfc6afe
|
||||||
|
d3ef9388
|
||||||
|
c53911dc
|
||||||
|
37006f1b
|
||||||
|
4a54af3a
|
||||||
|
98d05c71
|
||||||
|
5a26f5b4
|
||||||
|
b9108715
|
||||||
|
cc684112
|
||||||
|
b2ac45af
|
||||||
crc32b
|
crc32b
|
||||||
00000000
|
00000000
|
||||||
e8b7be43
|
e8b7be43
|
||||||
@ -78,6 +134,20 @@ e8b7be43
|
|||||||
4c2750bd
|
4c2750bd
|
||||||
1fc2e6d2
|
1fc2e6d2
|
||||||
7ca94a72
|
7ca94a72
|
||||||
|
1e5fcdb7
|
||||||
|
70b54c2f
|
||||||
|
094fb11e
|
||||||
|
38210c49
|
||||||
|
7399c6ef
|
||||||
|
83e98d04
|
||||||
|
1f26a94e
|
||||||
|
e2e8634a
|
||||||
|
0642542d
|
||||||
|
43b42c9b
|
||||||
|
262e1ded
|
||||||
|
b7a463c4
|
||||||
|
dfa1bbae
|
||||||
|
4022d57a
|
||||||
crc32c
|
crc32c
|
||||||
00000000
|
00000000
|
||||||
c1d04330
|
c1d04330
|
||||||
@ -117,3 +187,17 @@ de2e65c5
|
|||||||
66ed1d8b
|
66ed1d8b
|
||||||
dcded527
|
dcded527
|
||||||
9c44184b
|
9c44184b
|
||||||
|
9aa4287f
|
||||||
|
ab2761c5
|
||||||
|
cd486b4b
|
||||||
|
c19c4a41
|
||||||
|
1ea5b441
|
||||||
|
36d20512
|
||||||
|
31d11ffa
|
||||||
|
65d5bb9e
|
||||||
|
a0e3e317
|
||||||
|
8dc10a7c
|
||||||
|
7ab04135
|
||||||
|
c292a38d
|
||||||
|
e3e558ec
|
||||||
|
b6c5e13e
|
||||||
|
@ -33,6 +33,7 @@
|
|||||||
#include "ext/standard/php_dns.h"
|
#include "ext/standard/php_dns.h"
|
||||||
#include "ext/standard/php_uuencode.h"
|
#include "ext/standard/php_uuencode.h"
|
||||||
#include "ext/standard/php_mt_rand.h"
|
#include "ext/standard/php_mt_rand.h"
|
||||||
|
#include "ext/standard/crc32_x86.h"
|
||||||
|
|
||||||
#ifdef PHP_WIN32
|
#ifdef PHP_WIN32
|
||||||
#include "win32/php_win32_globals.h"
|
#include "win32/php_win32_globals.h"
|
||||||
@ -363,6 +364,10 @@ PHP_MINIT_FUNCTION(basic) /* {{{ */
|
|||||||
BASIC_MINIT_SUBMODULE(string_intrin)
|
BASIC_MINIT_SUBMODULE(string_intrin)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if ZEND_INTRIN_SSE4_2_PCLMUL_FUNC_PTR
|
||||||
|
BASIC_MINIT_SUBMODULE(crc32_x86_intrin)
|
||||||
|
#endif
|
||||||
|
|
||||||
#if ZEND_INTRIN_AVX2_FUNC_PTR || ZEND_INTRIN_SSSE3_FUNC_PTR
|
#if ZEND_INTRIN_AVX2_FUNC_PTR || ZEND_INTRIN_SSSE3_FUNC_PTR
|
||||||
BASIC_MINIT_SUBMODULE(base64_intrin)
|
BASIC_MINIT_SUBMODULE(base64_intrin)
|
||||||
#endif
|
#endif
|
||||||
|
@ -449,7 +449,7 @@ PHP_NEW_EXTENSION(standard, array.c base64.c basic_functions.c browscap.c crc32.
|
|||||||
http_fopen_wrapper.c php_fopen_wrapper.c credits.c css.c \
|
http_fopen_wrapper.c php_fopen_wrapper.c credits.c css.c \
|
||||||
var_unserializer.c ftok.c sha1.c user_filters.c uuencode.c \
|
var_unserializer.c ftok.c sha1.c user_filters.c uuencode.c \
|
||||||
filters.c proc_open.c streamsfuncs.c http.c password.c \
|
filters.c proc_open.c streamsfuncs.c http.c password.c \
|
||||||
random.c net.c hrtime.c,,,
|
random.c net.c hrtime.c crc32_x86.c,,,
|
||||||
-DZEND_ENABLE_STATIC_TSRMLS_CACHE=1)
|
-DZEND_ENABLE_STATIC_TSRMLS_CACHE=1)
|
||||||
|
|
||||||
PHP_ADD_MAKEFILE_FRAGMENT
|
PHP_ADD_MAKEFILE_FRAGMENT
|
||||||
|
@ -25,7 +25,7 @@ ADD_FLAG("LIBS_STANDARD", "iphlpapi.lib");
|
|||||||
|
|
||||||
EXTENSION("standard", "array.c base64.c basic_functions.c browscap.c \
|
EXTENSION("standard", "array.c base64.c basic_functions.c browscap.c \
|
||||||
crc32.c crypt.c crypt_freesec.c crypt_blowfish.c crypt_sha256.c \
|
crc32.c crypt.c crypt_freesec.c crypt_blowfish.c crypt_sha256.c \
|
||||||
crypt_sha512.c php_crypt_r.c \
|
crypt_sha512.c php_crypt_r.c crc32_x86.c \
|
||||||
datetime.c dir.c dl.c dns.c dns_win32.c exec.c \
|
datetime.c dir.c dl.c dns.c dns_win32.c exec.c \
|
||||||
file.c filestat.c formatted_print.c fsock.c head.c html.c image.c \
|
file.c filestat.c formatted_print.c fsock.c head.c html.c image.c \
|
||||||
info.c iptc.c lcg.c link.c mail.c math.c md5.c metaphone.c microtime.c \
|
info.c iptc.c lcg.c link.c mail.c math.c md5.c metaphone.c microtime.c \
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
#include "php.h"
|
#include "php.h"
|
||||||
#include "basic_functions.h"
|
#include "basic_functions.h"
|
||||||
#include "crc32.h"
|
#include "crc32.h"
|
||||||
|
#include "crc32_x86.h"
|
||||||
|
|
||||||
#if HAVE_AARCH64_CRC32
|
#if HAVE_AARCH64_CRC32
|
||||||
# include <arm_acle.h>
|
# include <arm_acle.h>
|
||||||
@ -74,7 +75,7 @@ PHP_FUNCTION(crc32)
|
|||||||
char *p;
|
char *p;
|
||||||
size_t nr;
|
size_t nr;
|
||||||
uint32_t crcinit = 0;
|
uint32_t crcinit = 0;
|
||||||
register uint32_t crc;
|
uint32_t crc;
|
||||||
|
|
||||||
ZEND_PARSE_PARAMETERS_START(1, 1)
|
ZEND_PARSE_PARAMETERS_START(1, 1)
|
||||||
Z_PARAM_STRING(p, nr)
|
Z_PARAM_STRING(p, nr)
|
||||||
@ -89,6 +90,12 @@ PHP_FUNCTION(crc32)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if ZEND_INTRIN_SSE4_2_PCLMUL_NATIVE || ZEND_INTRIN_SSE4_2_PCLMUL_RESOLVER
|
||||||
|
size_t nr_simd = crc32_x86_simd_update(X86_CRC32B, &crc, (const unsigned char *)p, nr);
|
||||||
|
nr -= nr_simd;
|
||||||
|
p += nr_simd;
|
||||||
|
#endif
|
||||||
|
|
||||||
for (; nr--; ++p) {
|
for (; nr--; ++p) {
|
||||||
crc = ((crc >> 8) & 0x00FFFFFF) ^ crc32tab[(crc ^ (*p)) & 0xFF ];
|
crc = ((crc >> 8) & 0x00FFFFFF) ^ crc32tab[(crc ^ (*p)) & 0xFF ];
|
||||||
}
|
}
|
||||||
|
349
ext/standard/crc32_x86.c
Normal file
349
ext/standard/crc32_x86.c
Normal file
@ -0,0 +1,349 @@
|
|||||||
|
/*
|
||||||
|
+----------------------------------------------------------------------+
|
||||||
|
| Copyright (c) The PHP Group |
|
||||||
|
+----------------------------------------------------------------------+
|
||||||
|
| This source file is subject to version 3.01 of the PHP license, |
|
||||||
|
| that is bundled with this package in the file LICENSE, and is |
|
||||||
|
| available through the world-wide-web at the following url: |
|
||||||
|
| https://www.php.net/license/3_01.txt |
|
||||||
|
| If you did not receive a copy of the PHP license and are unable to |
|
||||||
|
| obtain it through the world-wide-web, please send a note to |
|
||||||
|
| license@php.net so we can mail you a copy immediately. |
|
||||||
|
+----------------------------------------------------------------------+
|
||||||
|
| Author: Frank Du <frank.du@intel.com> |
|
||||||
|
+----------------------------------------------------------------------+
|
||||||
|
| Compute the crc32 of the buffer. Based on: |
|
||||||
|
| "Fast CRC Computation for Generic Polynomials Using PCLMULQDQ" |
|
||||||
|
| V. Gopal, E. Ozturk, et al., 2009, http://intel.ly/2ySEwL0 |
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "crc32_x86.h"
|
||||||
|
|
||||||
|
#if ZEND_INTRIN_SSE4_2_PCLMUL_NATIVE || ZEND_INTRIN_SSE4_2_PCLMUL_RESOLVER
|
||||||
|
# include <nmmintrin.h>
|
||||||
|
# include <wmmintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if ZEND_INTRIN_SSE4_2_PCLMUL_RESOLVER
|
||||||
|
# include "Zend/zend_cpuinfo.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if ZEND_INTRIN_SSE4_2_PCLMUL_NATIVE || ZEND_INTRIN_SSE4_2_PCLMUL_RESOLVER
|
||||||
|
|
||||||
|
typedef struct _crc32_pclmul_bit_consts {
|
||||||
|
uint64_t k1k2[2];
|
||||||
|
uint64_t k3k4[2];
|
||||||
|
uint64_t k5k6[2];
|
||||||
|
uint64_t uPx[2];
|
||||||
|
} crc32_pclmul_consts;
|
||||||
|
|
||||||
|
static const crc32_pclmul_consts crc32_pclmul_consts_maps[X86_CRC32_MAX] = {
|
||||||
|
{ /* X86_CRC32, polynomial: 0x04C11DB7 */
|
||||||
|
{0x00e6228b11, 0x008833794c}, /* endianness swap */
|
||||||
|
{0x00e8a45605, 0x00c5b9cd4c}, /* endianness swap */
|
||||||
|
{0x00490d678d, 0x00f200aa66}, /* endianness swap */
|
||||||
|
{0x0104d101df, 0x0104c11db7}
|
||||||
|
},
|
||||||
|
{ /* X86_CRC32B, polynomial: 0x04C11DB7 with reversed ordering */
|
||||||
|
{0x0154442bd4, 0x01c6e41596},
|
||||||
|
{0x01751997d0, 0x00ccaa009e},
|
||||||
|
{0x0163cd6124, 0x01db710640},
|
||||||
|
{0x01f7011641, 0x01db710641},
|
||||||
|
},
|
||||||
|
{ /* X86_CRC32C, polynomial: 0x1EDC6F41 with reversed ordering */
|
||||||
|
{0x00740eef02, 0x009e4addf8},
|
||||||
|
{0x00f20c0dfe, 0x014cd00bd6},
|
||||||
|
{0x00dd45aab8, 0x0000000000},
|
||||||
|
{0x00dea713f1, 0x0105ec76f0}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
static uint8_t pclmul_shuf_mask_table[16] = {
|
||||||
|
0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08,
|
||||||
|
0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00,
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Folding of 128-bit data chunks */
|
||||||
|
#define CRC32_FOLDING_BLOCK_SIZE (16)
|
||||||
|
|
||||||
|
/* PCLMUL version of non-relfected crc32 */
|
||||||
|
ZEND_INTRIN_SSE4_2_PCLMUL_FUNC_DECL(size_t crc32_pclmul_batch(uint32_t *crc, const unsigned char *p, size_t nr, const crc32_pclmul_consts *consts));
|
||||||
|
size_t crc32_pclmul_batch(uint32_t *crc, const unsigned char *p, size_t nr, const crc32_pclmul_consts *consts)
|
||||||
|
{
|
||||||
|
size_t nr_in = nr;
|
||||||
|
__m128i x0, x1, x2, k, shuf_mask;
|
||||||
|
|
||||||
|
if (nr < CRC32_FOLDING_BLOCK_SIZE) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
shuf_mask = _mm_loadu_si128((__m128i *)(pclmul_shuf_mask_table));
|
||||||
|
x0 = _mm_cvtsi32_si128(*crc);
|
||||||
|
x1 = _mm_loadu_si128((__m128i *)(p + 0x00));
|
||||||
|
x0 = _mm_slli_si128(x0, 12);
|
||||||
|
x1 = _mm_shuffle_epi8(x1, shuf_mask); /* endianness swap */
|
||||||
|
x0 = _mm_xor_si128(x1, x0);
|
||||||
|
p += CRC32_FOLDING_BLOCK_SIZE;
|
||||||
|
nr -= CRC32_FOLDING_BLOCK_SIZE;
|
||||||
|
|
||||||
|
if (nr >= (CRC32_FOLDING_BLOCK_SIZE * 3)) {
|
||||||
|
__m128i x3, x4;
|
||||||
|
|
||||||
|
x1 = _mm_loadu_si128((__m128i *)(p + 0x00));
|
||||||
|
x1 = _mm_shuffle_epi8(x1, shuf_mask); /* endianness swap */
|
||||||
|
x2 = _mm_loadu_si128((__m128i *)(p + 0x10));
|
||||||
|
x2 = _mm_shuffle_epi8(x2, shuf_mask); /* endianness swap */
|
||||||
|
x3 = _mm_loadu_si128((__m128i *)(p + 0x20));
|
||||||
|
x3 = _mm_shuffle_epi8(x3, shuf_mask); /* endianness swap */
|
||||||
|
p += CRC32_FOLDING_BLOCK_SIZE * 3;
|
||||||
|
nr -= CRC32_FOLDING_BLOCK_SIZE * 3;
|
||||||
|
|
||||||
|
k = _mm_loadu_si128((__m128i *)consts->k1k2);
|
||||||
|
/* parallel folding by 4 */
|
||||||
|
while (nr >= (CRC32_FOLDING_BLOCK_SIZE * 4)) {
|
||||||
|
__m128i x5, x6, x7, x8, x9, x10, x11;
|
||||||
|
x4 = _mm_clmulepi64_si128(x0, k, 0x00);
|
||||||
|
x5 = _mm_clmulepi64_si128(x1, k, 0x00);
|
||||||
|
x6 = _mm_clmulepi64_si128(x2, k, 0x00);
|
||||||
|
x7 = _mm_clmulepi64_si128(x3, k, 0x00);
|
||||||
|
x0 = _mm_clmulepi64_si128(x0, k, 0x11);
|
||||||
|
x1 = _mm_clmulepi64_si128(x1, k, 0x11);
|
||||||
|
x2 = _mm_clmulepi64_si128(x2, k, 0x11);
|
||||||
|
x3 = _mm_clmulepi64_si128(x3, k, 0x11);
|
||||||
|
x8 = _mm_loadu_si128((__m128i *)(p + 0x00));
|
||||||
|
x8 = _mm_shuffle_epi8(x8, shuf_mask); /* endianness swap */
|
||||||
|
x9 = _mm_loadu_si128((__m128i *)(p + 0x10));
|
||||||
|
x9 = _mm_shuffle_epi8(x9, shuf_mask); /* endianness swap */
|
||||||
|
x10 = _mm_loadu_si128((__m128i *)(p + 0x20));
|
||||||
|
x10 = _mm_shuffle_epi8(x10, shuf_mask); /* endianness swap */
|
||||||
|
x11 = _mm_loadu_si128((__m128i *)(p + 0x30));
|
||||||
|
x11 = _mm_shuffle_epi8(x11, shuf_mask); /* endianness swap */
|
||||||
|
x0 = _mm_xor_si128(x0, x4);
|
||||||
|
x1 = _mm_xor_si128(x1, x5);
|
||||||
|
x2 = _mm_xor_si128(x2, x6);
|
||||||
|
x3 = _mm_xor_si128(x3, x7);
|
||||||
|
x0 = _mm_xor_si128(x0, x8);
|
||||||
|
x1 = _mm_xor_si128(x1, x9);
|
||||||
|
x2 = _mm_xor_si128(x2, x10);
|
||||||
|
x3 = _mm_xor_si128(x3, x11);
|
||||||
|
|
||||||
|
p += CRC32_FOLDING_BLOCK_SIZE * 4;
|
||||||
|
nr -= CRC32_FOLDING_BLOCK_SIZE * 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
k = _mm_loadu_si128((__m128i *)consts->k3k4);
|
||||||
|
/* fold 4 to 1, [x1, x2, x3] -> x0 */
|
||||||
|
x4 = _mm_clmulepi64_si128(x0, k, 0x00);
|
||||||
|
x0 = _mm_clmulepi64_si128(x0, k, 0x11);
|
||||||
|
x0 = _mm_xor_si128(x0, x1);
|
||||||
|
x0 = _mm_xor_si128(x0, x4);
|
||||||
|
x4 = _mm_clmulepi64_si128(x0, k, 0x00);
|
||||||
|
x0 = _mm_clmulepi64_si128(x0, k, 0x11);
|
||||||
|
x0 = _mm_xor_si128(x0, x2);
|
||||||
|
x0 = _mm_xor_si128(x0, x4);
|
||||||
|
x4 = _mm_clmulepi64_si128(x0, k, 0x00);
|
||||||
|
x0 = _mm_clmulepi64_si128(x0, k, 0x11);
|
||||||
|
x0 = _mm_xor_si128(x0, x3);
|
||||||
|
x0 = _mm_xor_si128(x0, x4);
|
||||||
|
}
|
||||||
|
|
||||||
|
k = _mm_loadu_si128((__m128i *)consts->k3k4);
|
||||||
|
/* folding by 1 */
|
||||||
|
while (nr >= CRC32_FOLDING_BLOCK_SIZE) {
|
||||||
|
/* load next to x2, fold to x0, x1 */
|
||||||
|
x2 = _mm_loadu_si128((__m128i *)(p + 0x00));
|
||||||
|
x2 = _mm_shuffle_epi8(x2, shuf_mask); /* endianness swap */
|
||||||
|
x1 = _mm_clmulepi64_si128(x0, k, 0x00);
|
||||||
|
x0 = _mm_clmulepi64_si128(x0, k, 0x11);
|
||||||
|
x0 = _mm_xor_si128(x0, x2);
|
||||||
|
x0 = _mm_xor_si128(x0, x1);
|
||||||
|
p += CRC32_FOLDING_BLOCK_SIZE;
|
||||||
|
nr -= CRC32_FOLDING_BLOCK_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* reduce 128-bits(final fold) to 96-bits */
|
||||||
|
k = _mm_loadu_si128((__m128i*)consts->k5k6);
|
||||||
|
x1 = _mm_clmulepi64_si128(x0, k, 0x11);
|
||||||
|
x0 = _mm_slli_si128(x0, 8);
|
||||||
|
x0 = _mm_srli_si128(x0, 4);
|
||||||
|
x0 = _mm_xor_si128(x0, x1);
|
||||||
|
/* reduce 96-bits to 64-bits */
|
||||||
|
x1 = _mm_clmulepi64_si128(x0, k, 0x01);
|
||||||
|
x0 = _mm_xor_si128(x0, x1);
|
||||||
|
|
||||||
|
/* barrett reduction */
|
||||||
|
k = _mm_loadu_si128((__m128i*)consts->uPx);
|
||||||
|
x1 = _mm_move_epi64(x0);
|
||||||
|
x1 = _mm_srli_si128(x1, 4);
|
||||||
|
x1 = _mm_clmulepi64_si128(x1, k, 0x00);
|
||||||
|
x1 = _mm_srli_si128(x1, 4);
|
||||||
|
x1 = _mm_clmulepi64_si128(x1, k, 0x10);
|
||||||
|
x0 = _mm_xor_si128(x1, x0);
|
||||||
|
*crc = _mm_extract_epi32(x0, 0);
|
||||||
|
return (nr_in - nr); /* the nr processed */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* PCLMUL version of relfected crc32 */
|
||||||
|
ZEND_INTRIN_SSE4_2_PCLMUL_FUNC_DECL(size_t crc32_pclmul_reflected_batch(uint32_t *crc, const unsigned char *p, size_t nr, const crc32_pclmul_consts *consts));
|
||||||
|
size_t crc32_pclmul_reflected_batch(uint32_t *crc, const unsigned char *p, size_t nr, const crc32_pclmul_consts *consts)
|
||||||
|
{
|
||||||
|
size_t nr_in = nr;
|
||||||
|
__m128i x0, x1, x2, k;
|
||||||
|
|
||||||
|
if (nr < CRC32_FOLDING_BLOCK_SIZE) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
x0 = _mm_loadu_si128((__m128i *)(p + 0x00));
|
||||||
|
x0 = _mm_xor_si128(x0, _mm_cvtsi32_si128(*crc));
|
||||||
|
p += CRC32_FOLDING_BLOCK_SIZE;
|
||||||
|
nr -= CRC32_FOLDING_BLOCK_SIZE;
|
||||||
|
if (nr >= (CRC32_FOLDING_BLOCK_SIZE * 3)) {
|
||||||
|
__m128i x3, x4;
|
||||||
|
|
||||||
|
x1 = _mm_loadu_si128((__m128i *)(p + 0x00));
|
||||||
|
x2 = _mm_loadu_si128((__m128i *)(p + 0x10));
|
||||||
|
x3 = _mm_loadu_si128((__m128i *)(p + 0x20));
|
||||||
|
p += CRC32_FOLDING_BLOCK_SIZE * 3;
|
||||||
|
nr -= CRC32_FOLDING_BLOCK_SIZE * 3;
|
||||||
|
|
||||||
|
k = _mm_loadu_si128((__m128i *)consts->k1k2);
|
||||||
|
/* parallel folding by 4 */
|
||||||
|
while (nr >= (CRC32_FOLDING_BLOCK_SIZE * 4)) {
|
||||||
|
__m128i x5, x6, x7, x8, x9, x10, x11;
|
||||||
|
x4 = _mm_clmulepi64_si128(x0, k, 0x00);
|
||||||
|
x5 = _mm_clmulepi64_si128(x1, k, 0x00);
|
||||||
|
x6 = _mm_clmulepi64_si128(x2, k, 0x00);
|
||||||
|
x7 = _mm_clmulepi64_si128(x3, k, 0x00);
|
||||||
|
x0 = _mm_clmulepi64_si128(x0, k, 0x11);
|
||||||
|
x1 = _mm_clmulepi64_si128(x1, k, 0x11);
|
||||||
|
x2 = _mm_clmulepi64_si128(x2, k, 0x11);
|
||||||
|
x3 = _mm_clmulepi64_si128(x3, k, 0x11);
|
||||||
|
x8 = _mm_loadu_si128((__m128i *)(p + 0x00));
|
||||||
|
x9 = _mm_loadu_si128((__m128i *)(p + 0x10));
|
||||||
|
x10 = _mm_loadu_si128((__m128i *)(p + 0x20));
|
||||||
|
x11 = _mm_loadu_si128((__m128i *)(p + 0x30));
|
||||||
|
x0 = _mm_xor_si128(x0, x4);
|
||||||
|
x1 = _mm_xor_si128(x1, x5);
|
||||||
|
x2 = _mm_xor_si128(x2, x6);
|
||||||
|
x3 = _mm_xor_si128(x3, x7);
|
||||||
|
x0 = _mm_xor_si128(x0, x8);
|
||||||
|
x1 = _mm_xor_si128(x1, x9);
|
||||||
|
x2 = _mm_xor_si128(x2, x10);
|
||||||
|
x3 = _mm_xor_si128(x3, x11);
|
||||||
|
|
||||||
|
p += CRC32_FOLDING_BLOCK_SIZE * 4;
|
||||||
|
nr -= CRC32_FOLDING_BLOCK_SIZE * 4;
|
||||||
|
}
|
||||||
|
|
||||||
|
k = _mm_loadu_si128((__m128i *)consts->k3k4);
|
||||||
|
/* fold 4 to 1, [x1, x2, x3] -> x0 */
|
||||||
|
x4 = _mm_clmulepi64_si128(x0, k, 0x00);
|
||||||
|
x0 = _mm_clmulepi64_si128(x0, k, 0x11);
|
||||||
|
x0 = _mm_xor_si128(x0, x1);
|
||||||
|
x0 = _mm_xor_si128(x0, x4);
|
||||||
|
x4 = _mm_clmulepi64_si128(x0, k, 0x00);
|
||||||
|
x0 = _mm_clmulepi64_si128(x0, k, 0x11);
|
||||||
|
x0 = _mm_xor_si128(x0, x2);
|
||||||
|
x0 = _mm_xor_si128(x0, x4);
|
||||||
|
x4 = _mm_clmulepi64_si128(x0, k, 0x00);
|
||||||
|
x0 = _mm_clmulepi64_si128(x0, k, 0x11);
|
||||||
|
x0 = _mm_xor_si128(x0, x3);
|
||||||
|
x0 = _mm_xor_si128(x0, x4);
|
||||||
|
}
|
||||||
|
|
||||||
|
k = _mm_loadu_si128((__m128i *)consts->k3k4);
|
||||||
|
/* folding by 1 */
|
||||||
|
while (nr >= CRC32_FOLDING_BLOCK_SIZE) {
|
||||||
|
/* load next to x2, fold to x0, x1 */
|
||||||
|
x2 = _mm_loadu_si128((__m128i *)(p + 0x00));
|
||||||
|
x1 = _mm_clmulepi64_si128(x0, k, 0x00);
|
||||||
|
x0 = _mm_clmulepi64_si128(x0, k, 0x11);
|
||||||
|
x0 = _mm_xor_si128(x0, x2);
|
||||||
|
x0 = _mm_xor_si128(x0, x1);
|
||||||
|
p += CRC32_FOLDING_BLOCK_SIZE;
|
||||||
|
nr -= CRC32_FOLDING_BLOCK_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* reduce 128-bits(final fold) to 96-bits */
|
||||||
|
x1 = _mm_clmulepi64_si128(x0, k, 0x10);
|
||||||
|
x0 = _mm_srli_si128(x0, 8);
|
||||||
|
x0 = _mm_xor_si128(x0, x1);
|
||||||
|
/* reduce 96-bits to 64-bits */
|
||||||
|
x1 = _mm_shuffle_epi32(x0, 0xfc);
|
||||||
|
x0 = _mm_shuffle_epi32(x0, 0xf9);
|
||||||
|
k = _mm_loadu_si128((__m128i*)consts->k5k6);
|
||||||
|
x1 = _mm_clmulepi64_si128(x1, k, 0x00);
|
||||||
|
x0 = _mm_xor_si128(x0, x1);
|
||||||
|
|
||||||
|
/* barrett reduction */
|
||||||
|
x1 = _mm_shuffle_epi32(x0, 0xf3);
|
||||||
|
x0 = _mm_slli_si128(x0, 4);
|
||||||
|
k = _mm_loadu_si128((__m128i*)consts->uPx);
|
||||||
|
x1 = _mm_clmulepi64_si128(x1, k, 0x00);
|
||||||
|
x1 = _mm_clmulepi64_si128(x1, k, 0x10);
|
||||||
|
x0 = _mm_xor_si128(x1, x0);
|
||||||
|
*crc = _mm_extract_epi32(x0, 2);
|
||||||
|
return (nr_in - nr); /* the nr processed */
|
||||||
|
}
|
||||||
|
|
||||||
|
# if ZEND_INTRIN_SSE4_2_PCLMUL_NATIVE
|
||||||
|
size_t crc32_x86_simd_update(X86_CRC32_TYPE type, uint32_t *crc, const unsigned char *p, size_t nr)
|
||||||
|
# else /* ZEND_INTRIN_SSE4_2_PCLMUL_RESOLVER */
|
||||||
|
size_t crc32_sse42_pclmul_update(X86_CRC32_TYPE type, uint32_t *crc, const unsigned char *p, size_t nr)
|
||||||
|
# endif
|
||||||
|
{
|
||||||
|
if (type > X86_CRC32_MAX) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
const crc32_pclmul_consts *consts = &crc32_pclmul_consts_maps[type];
|
||||||
|
|
||||||
|
switch (type) {
|
||||||
|
case X86_CRC32:
|
||||||
|
return crc32_pclmul_batch(crc, p, nr, consts);
|
||||||
|
case X86_CRC32B:
|
||||||
|
case X86_CRC32C:
|
||||||
|
return crc32_pclmul_reflected_batch(crc, p, nr, consts);
|
||||||
|
default:
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if ZEND_INTRIN_SSE4_2_PCLMUL_RESOLVER
|
||||||
|
static size_t crc32_x86_simd_update_default(X86_CRC32_TYPE type, uint32_t *crc, const unsigned char *p, size_t nr)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
# if ZEND_INTRIN_SSE4_2_PCLMUL_FUNC_PROTO
|
||||||
|
size_t crc32_x86_simd_update(X86_CRC32_TYPE type, uint32_t *crc, const unsigned char *p, size_t nr) __attribute__((ifunc("resolve_crc32_x86_simd_update")));
|
||||||
|
|
||||||
|
typedef size_t (*crc32_x86_simd_func_t)(X86_CRC32_TYPE type, uint32_t *crc, const unsigned char *p, size_t nr);
|
||||||
|
|
||||||
|
ZEND_NO_SANITIZE_ADDRESS
|
||||||
|
ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */
|
||||||
|
static crc32_x86_simd_func_t resolve_crc32_x86_simd_update() {
|
||||||
|
if (zend_cpu_supports_sse42() && zend_cpu_supports_pclmul()) {
|
||||||
|
return crc32_sse42_pclmul_update;
|
||||||
|
}
|
||||||
|
return crc32_x86_simd_update_default;
|
||||||
|
}
|
||||||
|
# else /* ZEND_INTRIN_SSE4_2_PCLMUL_FUNC_PTR */
|
||||||
|
static size_t (*crc32_x86_simd_ptr)(X86_CRC32_TYPE type, uint32_t *crc, const unsigned char *p, size_t nr) = crc32_x86_simd_update_default;
|
||||||
|
|
||||||
|
size_t crc32_x86_simd_update(X86_CRC32_TYPE type, uint32_t *crc, const unsigned char *p, size_t nr) {
|
||||||
|
return crc32_x86_simd_ptr(type, crc, p, nr);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* {{{ PHP_MINIT_FUNCTION */
|
||||||
|
PHP_MINIT_FUNCTION(crc32_x86_intrin)
|
||||||
|
{
|
||||||
|
if (zend_cpu_supports(ZEND_CPU_FEATURE_SSE42) && zend_cpu_supports(ZEND_CPU_FEATURE_PCLMULQDQ)) {
|
||||||
|
crc32_x86_simd_ptr = crc32_sse42_pclmul_update;
|
||||||
|
}
|
||||||
|
return SUCCESS;
|
||||||
|
}
|
||||||
|
/* }}} */
|
||||||
|
# endif
|
||||||
|
#endif
|
52
ext/standard/crc32_x86.h
Normal file
52
ext/standard/crc32_x86.h
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
/*
|
||||||
|
+----------------------------------------------------------------------+
|
||||||
|
| Copyright (c) The PHP Group |
|
||||||
|
+----------------------------------------------------------------------+
|
||||||
|
| This source file is subject to version 3.01 of the PHP license, |
|
||||||
|
| that is bundled with this package in the file LICENSE, and is |
|
||||||
|
| available through the world-wide-web at the following url: |
|
||||||
|
| https://www.php.net/license/3_01.txt |
|
||||||
|
| If you did not receive a copy of the PHP license and are unable to |
|
||||||
|
| obtain it through the world-wide-web, please send a note to |
|
||||||
|
| license@php.net so we can mail you a copy immediately. |
|
||||||
|
+----------------------------------------------------------------------+
|
||||||
|
| Author: Frank Du <frank.du@intel.com> |
|
||||||
|
+----------------------------------------------------------------------+
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _CRC32_X86_HEADER_H_
|
||||||
|
#define _CRC32_X86_HEADER_H_
|
||||||
|
|
||||||
|
#include "php.h"
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
/* polynomial: 0x04C11DB7, used by bzip */
|
||||||
|
X86_CRC32 = 0,
|
||||||
|
/*
|
||||||
|
polynomial: 0x04C11DB7 with reversed ordering,
|
||||||
|
used by ethernet (IEEE 802.3), gzip, zip, png, etc
|
||||||
|
*/
|
||||||
|
X86_CRC32B,
|
||||||
|
/*
|
||||||
|
polynomial: 0x1EDC6F41 with reversed ordering,
|
||||||
|
used by iSCSI, SCTP, Btrfs, ext4, etc
|
||||||
|
*/
|
||||||
|
X86_CRC32C,
|
||||||
|
X86_CRC32_MAX,
|
||||||
|
} X86_CRC32_TYPE;
|
||||||
|
|
||||||
|
#if ZEND_INTRIN_SSE4_2_PCLMUL_FUNC_PTR
|
||||||
|
PHP_MINIT_FUNCTION(crc32_x86_intrin);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if ZEND_INTRIN_SSE4_2_PCLMUL_NATIVE || ZEND_INTRIN_SSE4_2_PCLMUL_RESOLVER
|
||||||
|
/* Return the size processed by SIMD routine */
|
||||||
|
size_t crc32_x86_simd_update(X86_CRC32_TYPE type, uint32_t *crc, const unsigned char *p, size_t nr);
|
||||||
|
#else
|
||||||
|
static inline size_t crc32_x86_simd_update(X86_CRC32_TYPE type, uint32_t *crc, const unsigned char *p, size_t nr)
|
||||||
|
{
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
@ -6,9 +6,37 @@ $input = array("foo", "bar", "baz", "grldsajkopallkjasd");
|
|||||||
foreach($input AS $i) {
|
foreach($input AS $i) {
|
||||||
printf("%u\n", crc32($i));
|
printf("%u\n", crc32($i));
|
||||||
}
|
}
|
||||||
|
printf("%u\n", crc32("1234567890123456"));
|
||||||
|
printf("%u\n", crc32("1234567890123456abc"));
|
||||||
|
printf("%u\n", crc32("12345678901234561234567890123456"));
|
||||||
|
printf("%u\n", crc32("12345678901234561234567890123456abc"));
|
||||||
|
printf("%u\n", crc32("123456789012345612345678901234561234567890123456"));
|
||||||
|
printf("%u\n", crc32("123456789012345612345678901234561234567890123456abc"));
|
||||||
|
printf("%u\n", crc32("1234567890123456123456789012345612345678901234561234567890123456"));
|
||||||
|
printf("%u\n", crc32("1234567890123456123456789012345612345678901234561234567890123456abc"));
|
||||||
|
printf("%u\n", crc32("12345678901234561234567890123456123456789012345612345678901234561234567890123456"));
|
||||||
|
printf("%u\n", crc32("12345678901234561234567890123456123456789012345612345678901234561234567890123456abc"));
|
||||||
|
printf("%u\n", crc32("12345678901234561234567890123456123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456"));
|
||||||
|
printf("%u\n", crc32("12345678901234561234567890123456123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456abc"));
|
||||||
|
printf("%u\n", crc32("123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456"));
|
||||||
|
printf("%u\n", crc32("123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456123456789012345612345678901234561234567890123456abc"));
|
||||||
?>
|
?>
|
||||||
--EXPECT--
|
--EXPECT--
|
||||||
2356372769
|
2356372769
|
||||||
1996459178
|
1996459178
|
||||||
2015626392
|
2015626392
|
||||||
824412087
|
824412087
|
||||||
|
509595063
|
||||||
|
1890929711
|
||||||
|
156217630
|
||||||
|
941689929
|
||||||
|
1939457775
|
||||||
|
2213121284
|
||||||
|
522627406
|
||||||
|
3806880586
|
||||||
|
105010221
|
||||||
|
1135881371
|
||||||
|
640556525
|
||||||
|
3081003972
|
||||||
|
3751918510
|
||||||
|
1076024698
|
||||||
|
Loading…
Reference in New Issue
Block a user