From 91663a92d1697fc30a7ba4687d73e0f63ec2baa1 Mon Sep 17 00:00:00 2001 From: Christian Schneider Date: Wed, 5 Apr 2017 17:30:23 +0200 Subject: [PATCH] Replace slow generic sha3 implementation by https://github.com/gvanas/KeccakCodePackage Fix hash_copy() tests by using correct size for sha3 context sync config.w32 with with new sha3 files Move dependency on KeccakHash.h to hash_sha3.c so we do not rely on it to install php_hash_sha3.h Allocate memory for KeccacInstance in hash_sha3.c so header files do not need to know about implementation details while keeping API backward compatible to original sha3 implementation Fix memory leak because hash_copy is called after init which already allocates the hashinstance --- ext/hash/config.m4 | 13 +- ext/hash/config.w32 | 21 +- ext/hash/hash_sha3.c | 193 +- ext/hash/php_hash_sha3.h | 3 +- ext/hash/sha3/generic32lc/KeccakHash.c | 80 + ext/hash/sha3/generic32lc/KeccakHash.h | 113 + ext/hash/sha3/generic32lc/KeccakP-1600-SnP.h | 38 + .../generic32lc/KeccakP-1600-inplace32BI.c | 1158 +++++++++ ext/hash/sha3/generic32lc/KeccakSponge.c | 110 + ext/hash/sha3/generic32lc/KeccakSponge.h | 178 ++ ext/hash/sha3/generic32lc/KeccakSponge.inc | 313 +++ ext/hash/sha3/generic32lc/SnP-Relaned.h | 134 + ext/hash/sha3/generic32lc/align.h | 34 + ext/hash/sha3/generic32lc/brg_endian.h | 142 ++ ext/hash/sha3/generic64lc/KeccakHash.c | 80 + ext/hash/sha3/generic64lc/KeccakHash.h | 113 + .../sha3/generic64lc/KeccakP-1600-64.macros | 2197 +++++++++++++++++ ext/hash/sha3/generic64lc/KeccakP-1600-SnP.h | 50 + .../generic64lc/KeccakP-1600-opt64-config.h | 3 + .../sha3/generic64lc/KeccakP-1600-opt64.c | 484 ++++ .../generic64lc/KeccakP-1600-unrolling.macros | 198 ++ ext/hash/sha3/generic64lc/KeccakSponge.c | 110 + ext/hash/sha3/generic64lc/KeccakSponge.h | 178 ++ ext/hash/sha3/generic64lc/KeccakSponge.inc | 313 +++ ext/hash/sha3/generic64lc/SnP-Relaned.h | 134 + ext/hash/sha3/generic64lc/align.h | 34 + ext/hash/sha3/generic64lc/brg_endian.h | 142 ++ 27 files changed, 6379 insertions(+), 187 deletions(-) create mode 100644 ext/hash/sha3/generic32lc/KeccakHash.c create mode 100644 ext/hash/sha3/generic32lc/KeccakHash.h create mode 100644 ext/hash/sha3/generic32lc/KeccakP-1600-SnP.h create mode 100644 ext/hash/sha3/generic32lc/KeccakP-1600-inplace32BI.c create mode 100644 ext/hash/sha3/generic32lc/KeccakSponge.c create mode 100644 ext/hash/sha3/generic32lc/KeccakSponge.h create mode 100644 ext/hash/sha3/generic32lc/KeccakSponge.inc create mode 100644 ext/hash/sha3/generic32lc/SnP-Relaned.h create mode 100644 ext/hash/sha3/generic32lc/align.h create mode 100644 ext/hash/sha3/generic32lc/brg_endian.h create mode 100644 ext/hash/sha3/generic64lc/KeccakHash.c create mode 100644 ext/hash/sha3/generic64lc/KeccakHash.h create mode 100644 ext/hash/sha3/generic64lc/KeccakP-1600-64.macros create mode 100644 ext/hash/sha3/generic64lc/KeccakP-1600-SnP.h create mode 100644 ext/hash/sha3/generic64lc/KeccakP-1600-opt64-config.h create mode 100644 ext/hash/sha3/generic64lc/KeccakP-1600-opt64.c create mode 100644 ext/hash/sha3/generic64lc/KeccakP-1600-unrolling.macros create mode 100644 ext/hash/sha3/generic64lc/KeccakSponge.c create mode 100644 ext/hash/sha3/generic64lc/KeccakSponge.h create mode 100644 ext/hash/sha3/generic64lc/KeccakSponge.inc create mode 100644 ext/hash/sha3/generic64lc/SnP-Relaned.h create mode 100644 ext/hash/sha3/generic64lc/align.h create mode 100644 ext/hash/sha3/generic64lc/brg_endian.h diff --git a/ext/hash/config.m4 b/ext/hash/config.m4 index 703cf14a302..5629453d861 100644 --- a/ext/hash/config.m4 +++ b/ext/hash/config.m4 @@ -25,15 +25,24 @@ if test "$PHP_HASH" != "no"; then AC_CHECK_SIZEOF(long, 4) AC_CHECK_SIZEOF(long long, 8) + PHP_CHECK_64BIT([ + SHA3_DIR="sha3/generic32lc" + SHA3_OPT_SRC="$SHA3_DIR/KeccakP-1600-inplace32BI.c" + ],[ + SHA3_DIR="sha3/generic64lc" + SHA3_OPT_SRC="$SHA3_DIR/KeccakP-1600-opt64.c" + ]) + EXT_HASH_SHA3_SOURCES="$SHA3_OPT_SRC $SHA3_DIR/KeccakHash.c $SHA3_DIR/KeccakSponge.c" + PHP_HASH_CFLAGS="-I@ext_srcdir@/$SHA3_DIR -DKeccakP200_excluded -DKeccakP400_excluded -DKeccakP800_excluded" EXT_HASH_SOURCES="hash.c hash_md.c hash_sha.c hash_ripemd.c hash_haval.c \ hash_tiger.c hash_gost.c hash_snefru.c hash_whirlpool.c hash_adler32.c \ - hash_crc32.c hash_fnv.c hash_joaat.c hash_sha3.c" + hash_crc32.c hash_fnv.c hash_joaat.c hash_sha3.c $EXT_HASH_SHA3_SOURCES" EXT_HASH_HEADERS="php_hash.h php_hash_md.h php_hash_sha.h php_hash_ripemd.h \ php_hash_haval.h php_hash_tiger.h php_hash_gost.h php_hash_snefru.h \ php_hash_whirlpool.h php_hash_adler32.h php_hash_crc32.h \ php_hash_fnv.h php_hash_joaat.h php_hash_sha3.h" - PHP_NEW_EXTENSION(hash, $EXT_HASH_SOURCES, $ext_shared) + PHP_NEW_EXTENSION(hash, $EXT_HASH_SOURCES, $ext_shared,,$PHP_HASH_CFLAGS) ifdef([PHP_INSTALL_HEADERS], [ PHP_INSTALL_HEADERS(ext/hash, $EXT_HASH_HEADERS) ]) diff --git a/ext/hash/config.w32 b/ext/hash/config.w32 index 17711facd8f..1112142bbba 100644 --- a/ext/hash/config.w32 +++ b/ext/hash/config.w32 @@ -12,13 +12,24 @@ if (PHP_MHASH != "no") { } if (PHP_HASH != "no") { - AC_DEFINE('HAVE_HASH_EXT', 1); - EXTENSION("hash", "hash.c hash_md.c hash_sha.c hash_ripemd.c hash_haval.c " - + "hash_tiger.c hash_gost.c hash_snefru.c hash_whirlpool.c " - + "hash_adler32.c hash_crc32.c hash_joaat.c hash_fnv.c hash_sha3.c"); + var sha3_arch_dir = "sha3/" + (X64 ? "generic64lc" : "generic32lc"); + var sha3_dir = "ext/hash/" + sha3_arch_dir; + if (CHECK_HEADER_ADD_INCLUDE("KeccakHash.h", "CFLAGS_HASH", PHP_HASH + ";" + sha3_dir)) { + AC_DEFINE('HAVE_HASH_EXT', 1); + EXTENSION("hash", "hash.c hash_md.c hash_sha.c hash_ripemd.c hash_haval.c " + + "hash_tiger.c hash_gost.c hash_snefru.c hash_whirlpool.c " + + "hash_adler32.c hash_crc32.c hash_joaat.c hash_fnv.c hash_sha3.c"); + + ADD_SOURCES(sha3_dir, "KeccakHash.c KeccakSponge.c " + (X64 ? "KeccakP-1600-opt64.c" : "KeccakP-1600-inplace32BI.c"), + "hash"); + ADD_FLAG("CFLAGS_HASH", "/DKeccakP200_excluded /DKeccakP400_excluded /DKeccakP800_excluded"); + PHP_INSTALL_HEADERS("ext/hash/", "php_hash.h php_hash_md.h php_hash_sha.h php_hash_ripemd.h " + "php_hash_haval.h php_hash_tiger.h php_hash_gost.h php_hash_snefru.h " + - "php_hash_whirlpool.h php_hash_adler32.h php_hash_crc32.h php_hash_sha3.h"); + "php_hash_whirlpool.h php_hash_adler32.h php_hash_crc32.h php_hash_sha3.h "); + } else { + WARNING("gd not enabled; libraries and headers not found"); + } } diff --git a/ext/hash/hash_sha3.c b/ext/hash/hash_sha3.c index a3bfda48998..ee9d010da4b 100644 --- a/ext/hash/hash_sha3.c +++ b/ext/hash/hash_sha3.c @@ -19,204 +19,41 @@ #include "php_hash.h" #include "php_hash_sha3.h" -#if (defined(__APPLE__) || defined(__APPLE_CC__)) && \ - (defined(__BIG_ENDIAN__) || defined(__LITTLE_ENDIAN__)) -# if defined(__LITTLE_ENDIAN__) -# undef WORDS_BIGENDIAN -# else -# if defined(__BIG_ENDIAN__) -# define WORDS_BIGENDIAN -# endif -# endif -#endif +#define SUCCESS SHA3_SUCCESS /* Avoid conflict between KeccacHash.h and zend_types.h */ +#include "KeccakHash.h" -static inline uint64_t rol64(uint64_t v, unsigned char b) { - return (v << b) | (v >> (64 - b)); -} -static inline unsigned char idx(unsigned char x, unsigned char y) { - return x + (5 * y); -} -#ifdef WORDS_BIGENDIAN -static inline uint64_t load64(const unsigned char* x) { - char i; - uint64_t ret = 0; - for (i = 7; i >= 0; --i) { - ret <<= 8; - ret |= x[i]; - } - return ret; -} -static inline void store64(unsigned char* x, uint64_t val) { - char i; - for (i = 0; i < 8; ++i) { - x[i] = val & 0xFF; - val >>= 8; - } -} -static inline void xor64(unsigned char* x, uint64_t val) { - char i; - for (i = 0; i < 8; ++i) { - x[i] ^= val & 0xFF; - val >>= 8; - } -} -# define readLane(x, y) load64(ctx->state+sizeof(uint64_t)*idx(x, y)) -# define writeLane(x, y, v) store64(ctx->state+sizeof(uint64_t)*idx(x, y), v) -# define XORLane(x, y, v) xor64(ctx->state+sizeof(uint64_t)*idx(x, y), v) -#else -# define readLane(x, y) (((uint64_t*)ctx->state)[idx(x,y)]) -# define writeLane(x, y, v) (((uint64_t*)ctx->state)[idx(x,y)] = v) -# define XORLane(x, y, v) (((uint64_t*)ctx->state)[idx(x,y)] ^= v) -#endif +// ========================================================================== -static inline char LFSR86540(unsigned char* pLFSR) +static int hash_sha3_copy(const void *ops, void *orig_context, void *dest_context) { - unsigned char LFSR = *pLFSR; - char result = LFSR & 0x01; - if (LFSR & 0x80) { - // Primitive polynomial over GF(2): x^8+x^6+x^5+x^4+1 - LFSR = (LFSR << 1) ^ 0x71; - } else { - LFSR <<= 1; - } - *pLFSR = LFSR; - return result; + PHP_SHA3_CTX* orig = (PHP_SHA3_CTX*)orig_context; + PHP_SHA3_CTX* dest = (PHP_SHA3_CTX*)dest_context; + memcpy(dest->hashinstance, orig->hashinstance, sizeof(Keccak_HashInstance)); + return SUCCESS; } -static void permute(PHP_SHA3_CTX* ctx) { - unsigned char LFSRstate = 0x01; - unsigned char round; - - for (round = 0; round < 24; ++round) { - { // Theta step (see [Keccak Reference, Section 2.3.2]) - uint64_t C[5], D; - unsigned char x, y; - for (x = 0; x < 5; ++x) { - C[x] = readLane(x, 0) ^ readLane(x, 1) ^ - readLane(x, 2) ^ readLane(x, 3) ^ readLane(x, 4); - } - for (x = 0; x < 5; ++x) { - D = C[(x+4)%5] ^ rol64(C[(x+1)%5], 1); - for (y = 0; y < 5; ++y) { - XORLane(x, y, D); - } - } - } - - { // p and Pi steps (see [Keccak Reference, Sections 2.3.3 and 2.3.4]) - unsigned char x = 1, y = 0, t; - uint64_t current = readLane(x, y); - for (t = 0; t < 24; ++t) { - unsigned char r = ((t + 1) * (t + 2) / 2) % 64; - unsigned char Y = (2*x + 3*y) % 5; - uint64_t temp; - x = y; - y = Y; - temp = readLane(x, y); - writeLane(x, y, rol64(current, r)); - current = temp; - } - } - - { // X step (see [Keccak Reference, Section 2.3.1]) - unsigned char x, y; - for (y = 0; y < 5; ++y) { - uint64_t temp[5]; - for (x = 0; x < 5; ++x) { - temp[x] = readLane(x, y); - } - for (x = 0; x < 5; ++x) { - writeLane(x, y, temp[x] ^((~temp[(x+1)%5]) & temp[(x+2)%5])); - } - } - } - - { // i step (see [Keccak Reference, Section 2.3.5]) - unsigned char j; - for (j = 0; j < 7; ++j) { - if (LFSR86540(&LFSRstate)) { - uint64_t bitPos = (1< 0) { - unsigned int len = block_size - ctx->pos; - if (len > count) len = count; - count -= len; - while (len-- > 0) { - ctx->state[ctx->pos++] ^= *(buf++); - } - if (ctx->pos >= block_size) { - permute(ctx); - ctx->pos = 0; - } - } -} - -static void PHP_SHA3_Final(unsigned char* digest, - PHP_SHA3_CTX* ctx, - int block_size, - int digest_size) { - int len = digest_size; - - // Pad state to finalize - ctx->state[ctx->pos++] ^= 0x06; - ctx->state[block_size-1] ^= 0x80; - permute(ctx); - - // Square output for digest - for(;;) { - int bs = (len < block_size) ? len : block_size; - memcpy(digest, ctx->state, bs); - digest += bs; - len -= bs; - if (!len) break; - permute(ctx); - } - - // Zero out context - memset(ctx, 0, sizeof(PHP_SHA3_CTX)); -} - -// ========================================================================== - #define DECLARE_SHA3_OPS(bits) \ void PHP_SHA3##bits##Init(PHP_SHA3_##bits##_CTX* ctx) { \ - PHP_SHA3_Init(ctx, bits); \ + ctx->hashinstance = emalloc(sizeof(Keccak_HashInstance)); \ + Keccak_HashInitialize_SHA3_##bits((Keccak_HashInstance *)ctx->hashinstance); \ } \ void PHP_SHA3##bits##Update(PHP_SHA3_##bits##_CTX* ctx, \ const unsigned char* input, \ unsigned int inputLen) { \ - PHP_SHA3_Update(ctx, input, inputLen, \ - (1600 - (2 * bits)) >> 3); \ + Keccak_HashUpdate((Keccak_HashInstance *)ctx->hashinstance, input, inputLen * 8); \ } \ void PHP_SHA3##bits##Final(unsigned char* digest, \ PHP_SHA3_##bits##_CTX* ctx) { \ - PHP_SHA3_Final(digest, ctx, \ - (1600 - (2 * bits)) >> 3, \ - bits >> 3); \ + Keccak_HashFinal((Keccak_HashInstance *)ctx->hashinstance, digest); \ + efree(ctx->hashinstance); \ + ctx->hashinstance = NULL; \ } \ const php_hash_ops php_hash_sha3_##bits##_ops = { \ (php_hash_init_func_t) PHP_SHA3##bits##Init, \ (php_hash_update_func_t) PHP_SHA3##bits##Update, \ (php_hash_final_func_t) PHP_SHA3##bits##Final, \ - php_hash_copy, \ + hash_sha3_copy, \ bits >> 3, \ (1600 - (2 * bits)) >> 3, \ sizeof(PHP_SHA3_##bits##_CTX), \ diff --git a/ext/hash/php_hash_sha3.h b/ext/hash/php_hash_sha3.h index 8b70ef4a7e0..b47d1b102f3 100644 --- a/ext/hash/php_hash_sha3.h +++ b/ext/hash/php_hash_sha3.h @@ -22,8 +22,7 @@ #include "php.h" typedef struct { - unsigned char state[200]; // 5 * 5 * sizeof(uint64) - uint32_t pos; + void *hashinstance; } PHP_SHA3_CTX; typedef PHP_SHA3_CTX PHP_SHA3_224_CTX; diff --git a/ext/hash/sha3/generic32lc/KeccakHash.c b/ext/hash/sha3/generic32lc/KeccakHash.c new file mode 100644 index 00000000000..259831baddc --- /dev/null +++ b/ext/hash/sha3/generic32lc/KeccakHash.c @@ -0,0 +1,80 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include +#include "KeccakHash.h" + +/* ---------------------------------------------------------------- */ + +HashReturn Keccak_HashInitialize(Keccak_HashInstance *instance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix) +{ + HashReturn result; + + if (delimitedSuffix == 0) + return FAIL; + result = (HashReturn)KeccakWidth1600_SpongeInitialize(&instance->sponge, rate, capacity); + if (result != SUCCESS) + return result; + instance->fixedOutputLength = hashbitlen; + instance->delimitedSuffix = delimitedSuffix; + return SUCCESS; +} + +/* ---------------------------------------------------------------- */ + +HashReturn Keccak_HashUpdate(Keccak_HashInstance *instance, const BitSequence *data, DataLength databitlen) +{ + if ((databitlen % 8) == 0) + return (HashReturn)KeccakWidth1600_SpongeAbsorb(&instance->sponge, data, databitlen/8); + else { + HashReturn ret = (HashReturn)KeccakWidth1600_SpongeAbsorb(&instance->sponge, data, databitlen/8); + if (ret == SUCCESS) { + /* The last partial byte is assumed to be aligned on the least significant bits */ + unsigned char lastByte = data[databitlen/8]; + /* Concatenate the last few bits provided here with those of the suffix */ + unsigned short delimitedLastBytes = (unsigned short)((unsigned short)lastByte | ((unsigned short)instance->delimitedSuffix << (databitlen % 8))); + if ((delimitedLastBytes & 0xFF00) == 0x0000) { + instance->delimitedSuffix = delimitedLastBytes & 0xFF; + } + else { + unsigned char oneByte[1]; + oneByte[0] = delimitedLastBytes & 0xFF; + ret = (HashReturn)KeccakWidth1600_SpongeAbsorb(&instance->sponge, oneByte, 1); + instance->delimitedSuffix = (delimitedLastBytes >> 8) & 0xFF; + } + } + return ret; + } +} + +/* ---------------------------------------------------------------- */ + +HashReturn Keccak_HashFinal(Keccak_HashInstance *instance, BitSequence *hashval) +{ + HashReturn ret = (HashReturn)KeccakWidth1600_SpongeAbsorbLastFewBits(&instance->sponge, instance->delimitedSuffix); + if (ret == SUCCESS) + return (HashReturn)KeccakWidth1600_SpongeSqueeze(&instance->sponge, hashval, instance->fixedOutputLength/8); + else + return ret; +} + +/* ---------------------------------------------------------------- */ + +HashReturn Keccak_HashSqueeze(Keccak_HashInstance *instance, BitSequence *data, DataLength databitlen) +{ + if ((databitlen % 8) != 0) + return FAIL; + return (HashReturn)KeccakWidth1600_SpongeSqueeze(&instance->sponge, data, databitlen/8); +} diff --git a/ext/hash/sha3/generic32lc/KeccakHash.h b/ext/hash/sha3/generic32lc/KeccakHash.h new file mode 100644 index 00000000000..ec35d3dab25 --- /dev/null +++ b/ext/hash/sha3/generic32lc/KeccakHash.h @@ -0,0 +1,113 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _KeccakHashInterface_h_ +#define _KeccakHashInterface_h_ + +#ifndef KeccakP1600_excluded + +#include "KeccakSponge.h" +#include + +typedef unsigned char BitSequence; +typedef size_t DataLength; +typedef enum { SUCCESS = 0, FAIL = 1, BAD_HASHLEN = 2 } HashReturn; + +typedef struct { + KeccakWidth1600_SpongeInstance sponge; + unsigned int fixedOutputLength; + unsigned char delimitedSuffix; +} Keccak_HashInstance; + +/** + * Function to initialize the Keccak[r, c] sponge function instance used in sequential hashing mode. + * @param hashInstance Pointer to the hash instance to be initialized. + * @param rate The value of the rate r. + * @param capacity The value of the capacity c. + * @param hashbitlen The desired number of output bits, + * or 0 for an arbitrarily-long output. + * @param delimitedSuffix Bits that will be automatically appended to the end + * of the input message, as in domain separation. + * This is a byte containing from 0 to 7 bits + * formatted like the @a delimitedData parameter of + * the Keccak_SpongeAbsorbLastFewBits() function. + * @pre One must have r+c=1600 and the rate a multiple of 8 bits in this implementation. + * @return SUCCESS if successful, FAIL otherwise. + */ +HashReturn Keccak_HashInitialize(Keccak_HashInstance *hashInstance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix); + +/** Macro to initialize a SHAKE128 instance as specified in the FIPS 202 standard. + */ +#define Keccak_HashInitialize_SHAKE128(hashInstance) Keccak_HashInitialize(hashInstance, 1344, 256, 0, 0x1F) + +/** Macro to initialize a SHAKE256 instance as specified in the FIPS 202 standard. + */ +#define Keccak_HashInitialize_SHAKE256(hashInstance) Keccak_HashInitialize(hashInstance, 1088, 512, 0, 0x1F) + +/** Macro to initialize a SHA3-224 instance as specified in the FIPS 202 standard. + */ +#define Keccak_HashInitialize_SHA3_224(hashInstance) Keccak_HashInitialize(hashInstance, 1152, 448, 224, 0x06) + +/** Macro to initialize a SHA3-256 instance as specified in the FIPS 202 standard. + */ +#define Keccak_HashInitialize_SHA3_256(hashInstance) Keccak_HashInitialize(hashInstance, 1088, 512, 256, 0x06) + +/** Macro to initialize a SHA3-384 instance as specified in the FIPS 202 standard. + */ +#define Keccak_HashInitialize_SHA3_384(hashInstance) Keccak_HashInitialize(hashInstance, 832, 768, 384, 0x06) + +/** Macro to initialize a SHA3-512 instance as specified in the FIPS 202 standard. + */ +#define Keccak_HashInitialize_SHA3_512(hashInstance) Keccak_HashInitialize(hashInstance, 576, 1024, 512, 0x06) + +/** + * Function to give input data to be absorbed. + * @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize(). + * @param data Pointer to the input data. + * When @a databitLen is not a multiple of 8, the last bits of data must be + * in the least significant bits of the last byte (little-endian convention). + * @param databitLen The number of input bits provided in the input data. + * @pre In the previous call to Keccak_HashUpdate(), databitlen was a multiple of 8. + * @return SUCCESS if successful, FAIL otherwise. + */ +HashReturn Keccak_HashUpdate(Keccak_HashInstance *hashInstance, const BitSequence *data, DataLength databitlen); + +/** + * Function to call after all input blocks have been input and to get + * output bits if the length was specified when calling Keccak_HashInitialize(). + * @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize(). + * If @a hashbitlen was not 0 in the call to Keccak_HashInitialize(), the number of + * output bits is equal to @a hashbitlen. + * If @a hashbitlen was 0 in the call to Keccak_HashInitialize(), the output bits + * must be extracted using the Keccak_HashSqueeze() function. + * @param hashval Pointer to the buffer where to store the output data. + * @return SUCCESS if successful, FAIL otherwise. + */ +HashReturn Keccak_HashFinal(Keccak_HashInstance *hashInstance, BitSequence *hashval); + + /** + * Function to squeeze output data. + * @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize(). + * @param data Pointer to the buffer where to store the output data. + * @param databitlen The number of output bits desired (must be a multiple of 8). + * @pre Keccak_HashFinal() must have been already called. + * @pre @a databitlen is a multiple of 8. + * @return SUCCESS if successful, FAIL otherwise. + */ +HashReturn Keccak_HashSqueeze(Keccak_HashInstance *hashInstance, BitSequence *data, DataLength databitlen); + +#endif + +#endif diff --git a/ext/hash/sha3/generic32lc/KeccakP-1600-SnP.h b/ext/hash/sha3/generic32lc/KeccakP-1600-SnP.h new file mode 100644 index 00000000000..258f411f77d --- /dev/null +++ b/ext/hash/sha3/generic32lc/KeccakP-1600-SnP.h @@ -0,0 +1,38 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _KeccakP_1600_SnP_h_ +#define _KeccakP_1600_SnP_h_ + +/** For the documentation, see SnP-documentation.h. + */ + +#define KeccakP1600_implementation "in-place 32-bit optimized implementation" +#define KeccakP1600_stateSizeInBytes 200 +#define KeccakP1600_stateAlignment 8 + +#define KeccakP1600_StaticInitialize() +void KeccakP1600_Initialize(void *state); +void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset); +void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length); +void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length); +void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount); +void KeccakP1600_Permute_Nrounds(void *state, unsigned int nrounds); +void KeccakP1600_Permute_12rounds(void *state); +void KeccakP1600_Permute_24rounds(void *state); +void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length); +void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length); + +#endif diff --git a/ext/hash/sha3/generic32lc/KeccakP-1600-inplace32BI.c b/ext/hash/sha3/generic32lc/KeccakP-1600-inplace32BI.c new file mode 100644 index 00000000000..3595e5f47fd --- /dev/null +++ b/ext/hash/sha3/generic32lc/KeccakP-1600-inplace32BI.c @@ -0,0 +1,1158 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include +#include "brg_endian.h" +#include "KeccakP-1600-SnP.h" +#include "SnP-Relaned.h" + +typedef unsigned char UINT8; +typedef unsigned int UINT32; +/* WARNING: on 8-bit and 16-bit platforms, this should be replaced by: */ +/* typedef unsigned long UINT32; */ + +#define ROL32(a, offset) ((((UINT32)a) << (offset)) ^ (((UINT32)a) >> (32-(offset)))) + +/* Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +#define prepareToBitInterleaving(low, high, temp, temp0, temp1) \ + temp0 = (low); \ + temp = (temp0 ^ (temp0 >> 1)) & 0x22222222UL; temp0 = temp0 ^ temp ^ (temp << 1); \ + temp = (temp0 ^ (temp0 >> 2)) & 0x0C0C0C0CUL; temp0 = temp0 ^ temp ^ (temp << 2); \ + temp = (temp0 ^ (temp0 >> 4)) & 0x00F000F0UL; temp0 = temp0 ^ temp ^ (temp << 4); \ + temp = (temp0 ^ (temp0 >> 8)) & 0x0000FF00UL; temp0 = temp0 ^ temp ^ (temp << 8); \ + temp1 = (high); \ + temp = (temp1 ^ (temp1 >> 1)) & 0x22222222UL; temp1 = temp1 ^ temp ^ (temp << 1); \ + temp = (temp1 ^ (temp1 >> 2)) & 0x0C0C0C0CUL; temp1 = temp1 ^ temp ^ (temp << 2); \ + temp = (temp1 ^ (temp1 >> 4)) & 0x00F000F0UL; temp1 = temp1 ^ temp ^ (temp << 4); \ + temp = (temp1 ^ (temp1 >> 8)) & 0x0000FF00UL; temp1 = temp1 ^ temp ^ (temp << 8); + +#define toBitInterleavingAndXOR(low, high, even, odd, temp, temp0, temp1) \ + prepareToBitInterleaving(low, high, temp, temp0, temp1) \ + even ^= (temp0 & 0x0000FFFF) | (temp1 << 16); \ + odd ^= (temp0 >> 16) | (temp1 & 0xFFFF0000); + +#define toBitInterleavingAndAND(low, high, even, odd, temp, temp0, temp1) \ + prepareToBitInterleaving(low, high, temp, temp0, temp1) \ + even &= (temp0 & 0x0000FFFF) | (temp1 << 16); \ + odd &= (temp0 >> 16) | (temp1 & 0xFFFF0000); + +#define toBitInterleavingAndSet(low, high, even, odd, temp, temp0, temp1) \ + prepareToBitInterleaving(low, high, temp, temp0, temp1) \ + even = (temp0 & 0x0000FFFF) | (temp1 << 16); \ + odd = (temp0 >> 16) | (temp1 & 0xFFFF0000); + +/* Credit to Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 */ +#define prepareFromBitInterleaving(even, odd, temp, temp0, temp1) \ + temp0 = (even); \ + temp1 = (odd); \ + temp = (temp0 & 0x0000FFFF) | (temp1 << 16); \ + temp1 = (temp0 >> 16) | (temp1 & 0xFFFF0000); \ + temp0 = temp; \ + temp = (temp0 ^ (temp0 >> 8)) & 0x0000FF00UL; temp0 = temp0 ^ temp ^ (temp << 8); \ + temp = (temp0 ^ (temp0 >> 4)) & 0x00F000F0UL; temp0 = temp0 ^ temp ^ (temp << 4); \ + temp = (temp0 ^ (temp0 >> 2)) & 0x0C0C0C0CUL; temp0 = temp0 ^ temp ^ (temp << 2); \ + temp = (temp0 ^ (temp0 >> 1)) & 0x22222222UL; temp0 = temp0 ^ temp ^ (temp << 1); \ + temp = (temp1 ^ (temp1 >> 8)) & 0x0000FF00UL; temp1 = temp1 ^ temp ^ (temp << 8); \ + temp = (temp1 ^ (temp1 >> 4)) & 0x00F000F0UL; temp1 = temp1 ^ temp ^ (temp << 4); \ + temp = (temp1 ^ (temp1 >> 2)) & 0x0C0C0C0CUL; temp1 = temp1 ^ temp ^ (temp << 2); \ + temp = (temp1 ^ (temp1 >> 1)) & 0x22222222UL; temp1 = temp1 ^ temp ^ (temp << 1); + +#define fromBitInterleaving(even, odd, low, high, temp, temp0, temp1) \ + prepareFromBitInterleaving(even, odd, temp, temp0, temp1) \ + low = temp0; \ + high = temp1; + +#define fromBitInterleavingAndXOR(even, odd, lowIn, highIn, lowOut, highOut, temp, temp0, temp1) \ + prepareFromBitInterleaving(even, odd, temp, temp0, temp1) \ + lowOut = lowIn ^ temp0; \ + highOut = highIn ^ temp1; + +void KeccakP1600_SetBytesInLaneToZero(void *state, unsigned int lanePosition, unsigned int offset, unsigned int length) +{ + UINT8 laneAsBytes[8]; + UINT32 low, high; + UINT32 temp, temp0, temp1; + UINT32 *stateAsHalfLanes = (UINT32*)state; + + memset(laneAsBytes, 0xFF, offset); + memset(laneAsBytes+offset, 0x00, length); + memset(laneAsBytes+offset+length, 0xFF, 8-offset-length); +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + low = *((UINT32*)(laneAsBytes+0)); + high = *((UINT32*)(laneAsBytes+4)); +#else + low = laneAsBytes[0] + | ((UINT32)(laneAsBytes[1]) << 8) + | ((UINT32)(laneAsBytes[2]) << 16) + | ((UINT32)(laneAsBytes[3]) << 24); + high = laneAsBytes[4] + | ((UINT32)(laneAsBytes[5]) << 8) + | ((UINT32)(laneAsBytes[6]) << 16) + | ((UINT32)(laneAsBytes[7]) << 24); +#endif + toBitInterleavingAndAND(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_Initialize(void *state) +{ + memset(state, 0, 200); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_AddByte(void *state, unsigned char byte, unsigned int offset) +{ + unsigned int lanePosition = offset/8; + unsigned int offsetInLane = offset%8; + UINT32 low, high; + UINT32 temp, temp0, temp1; + UINT32 *stateAsHalfLanes = (UINT32*)state; + + if (offsetInLane < 4) { + low = (UINT32)byte << (offsetInLane*8); + high = 0; + } + else { + low = 0; + high = (UINT32)byte << ((offsetInLane-4)*8); + } + toBitInterleavingAndXOR(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_AddBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length) +{ + UINT8 laneAsBytes[8]; + UINT32 low, high; + UINT32 temp, temp0, temp1; + UINT32 *stateAsHalfLanes = (UINT32*)state; + + memset(laneAsBytes, 0, 8); + memcpy(laneAsBytes+offset, data, length); +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + low = *((UINT32*)(laneAsBytes+0)); + high = *((UINT32*)(laneAsBytes+4)); +#else + low = laneAsBytes[0] + | ((UINT32)(laneAsBytes[1]) << 8) + | ((UINT32)(laneAsBytes[2]) << 16) + | ((UINT32)(laneAsBytes[3]) << 24); + high = laneAsBytes[4] + | ((UINT32)(laneAsBytes[5]) << 8) + | ((UINT32)(laneAsBytes[6]) << 16) + | ((UINT32)(laneAsBytes[7]) << 24); +#endif + toBitInterleavingAndXOR(low, high, stateAsHalfLanes[lanePosition*2+0], stateAsHalfLanes[lanePosition*2+1], temp, temp0, temp1); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_AddLanes(void *state, const unsigned char *data, unsigned int laneCount) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + const UINT32 * pI = (const UINT32 *)data; + UINT32 * pS = (UINT32*)state; + UINT32 t, x0, x1; + int i; + for (i = laneCount-1; i >= 0; --i) { +#ifdef NO_MISALIGNED_ACCESSES + UINT32 low; + UINT32 high; + memcpy(&low, pI++, 4); + memcpy(&high, pI++, 4); + toBitInterleavingAndXOR(low, high, *(pS++), *(pS++), t, x0, x1); +#else + toBitInterleavingAndXOR(*(pI++), *(pI++), *(pS++), *(pS++), t, x0, x1) +#endif + } +#else + unsigned int lanePosition; + for(lanePosition=0; lanePosition= 0; --i) { +#ifdef NO_MISALIGNED_ACCESSES + UINT32 low; + UINT32 high; + memcpy(&low, pI++, 4); + memcpy(&high, pI++, 4); + toBitInterleavingAndSet(low, high, *(pS++), *(pS++), t, x0, x1); +#else + toBitInterleavingAndSet(*(pI++), *(pI++), *(pS++), *(pS++), t, x0, x1) +#endif + } +#else + unsigned int lanePosition; + for(lanePosition=0; lanePosition> 8) & 0xFF; + laneAsBytes[2] = (low >> 16) & 0xFF; + laneAsBytes[3] = (low >> 24) & 0xFF; + laneAsBytes[4] = high & 0xFF; + laneAsBytes[5] = (high >> 8) & 0xFF; + laneAsBytes[6] = (high >> 16) & 0xFF; + laneAsBytes[7] = (high >> 24) & 0xFF; +#endif + memcpy(data, laneAsBytes+offset, length); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractLanes(const void *state, unsigned char *data, unsigned int laneCount) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + UINT32 * pI = (UINT32 *)data; + const UINT32 * pS = ( const UINT32 *)state; + UINT32 t, x0, x1; + int i; + for (i = laneCount-1; i >= 0; --i) { +#ifdef NO_MISALIGNED_ACCESSES + UINT32 low; + UINT32 high; + fromBitInterleaving(*(pS++), *(pS++), low, high, t, x0, x1); + memcpy(pI++, &low, 4); + memcpy(pI++, &high, 4); +#else + fromBitInterleaving(*(pS++), *(pS++), *(pI++), *(pI++), t, x0, x1) +#endif + } +#else + unsigned int lanePosition; + for(lanePosition=0; lanePosition> 8) & 0xFF; + laneAsBytes[2] = (low >> 16) & 0xFF; + laneAsBytes[3] = (low >> 24) & 0xFF; + laneAsBytes[4] = high & 0xFF; + laneAsBytes[5] = (high >> 8) & 0xFF; + laneAsBytes[6] = (high >> 16) & 0xFF; + laneAsBytes[7] = (high >> 24) & 0xFF; + memcpy(data+lanePosition*8, laneAsBytes, 8); + } +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length) +{ + SnP_ExtractBytes(state, data, offset, length, KeccakP1600_ExtractLanes, KeccakP1600_ExtractBytesInLane, 8); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractAndAddBytesInLane(const void *state, unsigned int lanePosition, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +{ + UINT32 *stateAsHalfLanes = (UINT32*)state; + UINT32 low, high, temp, temp0, temp1; + UINT8 laneAsBytes[8]; + unsigned int i; + + fromBitInterleaving(stateAsHalfLanes[lanePosition*2], stateAsHalfLanes[lanePosition*2+1], low, high, temp, temp0, temp1); +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + *((UINT32*)(laneAsBytes+0)) = low; + *((UINT32*)(laneAsBytes+4)) = high; +#else + laneAsBytes[0] = low & 0xFF; + laneAsBytes[1] = (low >> 8) & 0xFF; + laneAsBytes[2] = (low >> 16) & 0xFF; + laneAsBytes[3] = (low >> 24) & 0xFF; + laneAsBytes[4] = high & 0xFF; + laneAsBytes[5] = (high >> 8) & 0xFF; + laneAsBytes[6] = (high >> 16) & 0xFF; + laneAsBytes[7] = (high >> 24) & 0xFF; +#endif + for(i=0; i= 0; --i) { +#ifdef NO_MISALIGNED_ACCESSES + UINT32 low; + UINT32 high; + fromBitInterleaving(*(pS++), *(pS++), low, high, t, x0, x1); + *(pO++) = *(pI++) ^ low; + *(pO++) = *(pI++) ^ high; +#else + fromBitInterleavingAndXOR(*(pS++), *(pS++), *(pI++), *(pI++), *(pO++), *(pO++), t, x0, x1) +#endif + } +#else + unsigned int lanePosition; + for(lanePosition=0; lanePosition> 8) & 0xFF; + laneAsBytes[2] = (low >> 16) & 0xFF; + laneAsBytes[3] = (low >> 24) & 0xFF; + laneAsBytes[4] = high & 0xFF; + laneAsBytes[5] = (high >> 8) & 0xFF; + laneAsBytes[6] = (high >> 16) & 0xFF; + laneAsBytes[7] = (high >> 24) & 0xFF; + ((UINT32*)(output+lanePosition*8))[0] = ((UINT32*)(input+lanePosition*8))[0] ^ (*(const UINT32*)(laneAsBytes+0)); + ((UINT32*)(output+lanePosition*8))[1] = ((UINT32*)(input+lanePosition*8))[0] ^ (*(const UINT32*)(laneAsBytes+4)); + } +#endif +} +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +{ + SnP_ExtractAndAddBytes(state, input, output, offset, length, KeccakP1600_ExtractAndAddLanes, KeccakP1600_ExtractAndAddBytesInLane, 8); +} + +/* ---------------------------------------------------------------- */ + +static const UINT32 KeccakF1600RoundConstants_int2[2*24+1] = +{ + 0x00000001UL, 0x00000000UL, + 0x00000000UL, 0x00000089UL, + 0x00000000UL, 0x8000008bUL, + 0x00000000UL, 0x80008080UL, + 0x00000001UL, 0x0000008bUL, + 0x00000001UL, 0x00008000UL, + 0x00000001UL, 0x80008088UL, + 0x00000001UL, 0x80000082UL, + 0x00000000UL, 0x0000000bUL, + 0x00000000UL, 0x0000000aUL, + 0x00000001UL, 0x00008082UL, + 0x00000000UL, 0x00008003UL, + 0x00000001UL, 0x0000808bUL, + 0x00000001UL, 0x8000000bUL, + 0x00000001UL, 0x8000008aUL, + 0x00000001UL, 0x80000081UL, + 0x00000000UL, 0x80000081UL, + 0x00000000UL, 0x80000008UL, + 0x00000000UL, 0x00000083UL, + 0x00000000UL, 0x80008003UL, + 0x00000001UL, 0x80008088UL, + 0x00000000UL, 0x80000088UL, + 0x00000001UL, 0x00008000UL, + 0x00000000UL, 0x80008082UL, + 0x000000FFUL +}; + +#define KeccakRound0() \ + Cx = Abu0^Agu0^Aku0^Amu0^Asu0; \ + Du1 = Abe1^Age1^Ake1^Ame1^Ase1; \ + Da0 = Cx^ROL32(Du1, 1); \ + Cz = Abu1^Agu1^Aku1^Amu1^Asu1; \ + Du0 = Abe0^Age0^Ake0^Ame0^Ase0; \ + Da1 = Cz^Du0; \ + Cw = Abi0^Agi0^Aki0^Ami0^Asi0; \ + Do0 = Cw^ROL32(Cz, 1); \ + Cy = Abi1^Agi1^Aki1^Ami1^Asi1; \ + Do1 = Cy^Cx; \ + Cx = Aba0^Aga0^Aka0^Ama0^Asa0; \ + De0 = Cx^ROL32(Cy, 1); \ + Cz = Aba1^Aga1^Aka1^Ama1^Asa1; \ + De1 = Cz^Cw; \ + Cy = Abo1^Ago1^Ako1^Amo1^Aso1; \ + Di0 = Du0^ROL32(Cy, 1); \ + Cw = Abo0^Ago0^Ako0^Amo0^Aso0; \ + Di1 = Du1^Cw; \ + Du0 = Cw^ROL32(Cz, 1); \ + Du1 = Cy^Cx; \ +\ + Ba = (Aba0^Da0); \ + Be = ROL32((Age0^De0), 22); \ + Bi = ROL32((Aki1^Di1), 22); \ + Bo = ROL32((Amo1^Do1), 11); \ + Bu = ROL32((Asu0^Du0), 7); \ + Aba0 = Ba ^((~Be)& Bi ); \ + Aba0 ^= *(pRoundConstants++); \ + Age0 = Be ^((~Bi)& Bo ); \ + Aki1 = Bi ^((~Bo)& Bu ); \ + Amo1 = Bo ^((~Bu)& Ba ); \ + Asu0 = Bu ^((~Ba)& Be ); \ + Ba = (Aba1^Da1); \ + Be = ROL32((Age1^De1), 22); \ + Bi = ROL32((Aki0^Di0), 21); \ + Bo = ROL32((Amo0^Do0), 10); \ + Bu = ROL32((Asu1^Du1), 7); \ + Aba1 = Ba ^((~Be)& Bi ); \ + Aba1 ^= *(pRoundConstants++); \ + Age1 = Be ^((~Bi)& Bo ); \ + Aki0 = Bi ^((~Bo)& Bu ); \ + Amo0 = Bo ^((~Bu)& Ba ); \ + Asu1 = Bu ^((~Ba)& Be ); \ + Bi = ROL32((Aka1^Da1), 2); \ + Bo = ROL32((Ame1^De1), 23); \ + Bu = ROL32((Asi1^Di1), 31); \ + Ba = ROL32((Abo0^Do0), 14); \ + Be = ROL32((Agu0^Du0), 10); \ + Aka1 = Ba ^((~Be)& Bi ); \ + Ame1 = Be ^((~Bi)& Bo ); \ + Asi1 = Bi ^((~Bo)& Bu ); \ + Abo0 = Bo ^((~Bu)& Ba ); \ + Agu0 = Bu ^((~Ba)& Be ); \ + Bi = ROL32((Aka0^Da0), 1); \ + Bo = ROL32((Ame0^De0), 22); \ + Bu = ROL32((Asi0^Di0), 30); \ + Ba = ROL32((Abo1^Do1), 14); \ + Be = ROL32((Agu1^Du1), 10); \ + Aka0 = Ba ^((~Be)& Bi ); \ + Ame0 = Be ^((~Bi)& Bo ); \ + Asi0 = Bi ^((~Bo)& Bu ); \ + Abo1 = Bo ^((~Bu)& Ba ); \ + Agu1 = Bu ^((~Ba)& Be ); \ + Bu = ROL32((Asa0^Da0), 9); \ + Ba = ROL32((Abe1^De1), 1); \ + Be = ROL32((Agi0^Di0), 3); \ + Bi = ROL32((Ako1^Do1), 13); \ + Bo = ROL32((Amu0^Du0), 4); \ + Asa0 = Ba ^((~Be)& Bi ); \ + Abe1 = Be ^((~Bi)& Bo ); \ + Agi0 = Bi ^((~Bo)& Bu ); \ + Ako1 = Bo ^((~Bu)& Ba ); \ + Amu0 = Bu ^((~Ba)& Be ); \ + Bu = ROL32((Asa1^Da1), 9); \ + Ba = (Abe0^De0); \ + Be = ROL32((Agi1^Di1), 3); \ + Bi = ROL32((Ako0^Do0), 12); \ + Bo = ROL32((Amu1^Du1), 4); \ + Asa1 = Ba ^((~Be)& Bi ); \ + Abe0 = Be ^((~Bi)& Bo ); \ + Agi1 = Bi ^((~Bo)& Bu ); \ + Ako0 = Bo ^((~Bu)& Ba ); \ + Amu1 = Bu ^((~Ba)& Be ); \ + Be = ROL32((Aga0^Da0), 18); \ + Bi = ROL32((Ake0^De0), 5); \ + Bo = ROL32((Ami1^Di1), 8); \ + Bu = ROL32((Aso0^Do0), 28); \ + Ba = ROL32((Abu1^Du1), 14); \ + Aga0 = Ba ^((~Be)& Bi ); \ + Ake0 = Be ^((~Bi)& Bo ); \ + Ami1 = Bi ^((~Bo)& Bu ); \ + Aso0 = Bo ^((~Bu)& Ba ); \ + Abu1 = Bu ^((~Ba)& Be ); \ + Be = ROL32((Aga1^Da1), 18); \ + Bi = ROL32((Ake1^De1), 5); \ + Bo = ROL32((Ami0^Di0), 7); \ + Bu = ROL32((Aso1^Do1), 28); \ + Ba = ROL32((Abu0^Du0), 13); \ + Aga1 = Ba ^((~Be)& Bi ); \ + Ake1 = Be ^((~Bi)& Bo ); \ + Ami0 = Bi ^((~Bo)& Bu ); \ + Aso1 = Bo ^((~Bu)& Ba ); \ + Abu0 = Bu ^((~Ba)& Be ); \ + Bo = ROL32((Ama1^Da1), 21); \ + Bu = ROL32((Ase0^De0), 1); \ + Ba = ROL32((Abi0^Di0), 31); \ + Be = ROL32((Ago1^Do1), 28); \ + Bi = ROL32((Aku1^Du1), 20); \ + Ama1 = Ba ^((~Be)& Bi ); \ + Ase0 = Be ^((~Bi)& Bo ); \ + Abi0 = Bi ^((~Bo)& Bu ); \ + Ago1 = Bo ^((~Bu)& Ba ); \ + Aku1 = Bu ^((~Ba)& Be ); \ + Bo = ROL32((Ama0^Da0), 20); \ + Bu = ROL32((Ase1^De1), 1); \ + Ba = ROL32((Abi1^Di1), 31); \ + Be = ROL32((Ago0^Do0), 27); \ + Bi = ROL32((Aku0^Du0), 19); \ + Ama0 = Ba ^((~Be)& Bi ); \ + Ase1 = Be ^((~Bi)& Bo ); \ + Abi1 = Bi ^((~Bo)& Bu ); \ + Ago0 = Bo ^((~Bu)& Ba ); \ + Aku0 = Bu ^((~Ba)& Be ) + +#define KeccakRound1() \ + Cx = Asu0^Agu0^Amu0^Abu1^Aku1; \ + Du1 = Age1^Ame0^Abe0^Ake1^Ase1; \ + Da0 = Cx^ROL32(Du1, 1); \ + Cz = Asu1^Agu1^Amu1^Abu0^Aku0; \ + Du0 = Age0^Ame1^Abe1^Ake0^Ase0; \ + Da1 = Cz^Du0; \ + Cw = Aki1^Asi1^Agi0^Ami1^Abi0; \ + Do0 = Cw^ROL32(Cz, 1); \ + Cy = Aki0^Asi0^Agi1^Ami0^Abi1; \ + Do1 = Cy^Cx; \ + Cx = Aba0^Aka1^Asa0^Aga0^Ama1; \ + De0 = Cx^ROL32(Cy, 1); \ + Cz = Aba1^Aka0^Asa1^Aga1^Ama0; \ + De1 = Cz^Cw; \ + Cy = Amo0^Abo1^Ako0^Aso1^Ago0; \ + Di0 = Du0^ROL32(Cy, 1); \ + Cw = Amo1^Abo0^Ako1^Aso0^Ago1; \ + Di1 = Du1^Cw; \ + Du0 = Cw^ROL32(Cz, 1); \ + Du1 = Cy^Cx; \ +\ + Ba = (Aba0^Da0); \ + Be = ROL32((Ame1^De0), 22); \ + Bi = ROL32((Agi1^Di1), 22); \ + Bo = ROL32((Aso1^Do1), 11); \ + Bu = ROL32((Aku1^Du0), 7); \ + Aba0 = Ba ^((~Be)& Bi ); \ + Aba0 ^= *(pRoundConstants++); \ + Ame1 = Be ^((~Bi)& Bo ); \ + Agi1 = Bi ^((~Bo)& Bu ); \ + Aso1 = Bo ^((~Bu)& Ba ); \ + Aku1 = Bu ^((~Ba)& Be ); \ + Ba = (Aba1^Da1); \ + Be = ROL32((Ame0^De1), 22); \ + Bi = ROL32((Agi0^Di0), 21); \ + Bo = ROL32((Aso0^Do0), 10); \ + Bu = ROL32((Aku0^Du1), 7); \ + Aba1 = Ba ^((~Be)& Bi ); \ + Aba1 ^= *(pRoundConstants++); \ + Ame0 = Be ^((~Bi)& Bo ); \ + Agi0 = Bi ^((~Bo)& Bu ); \ + Aso0 = Bo ^((~Bu)& Ba ); \ + Aku0 = Bu ^((~Ba)& Be ); \ + Bi = ROL32((Asa1^Da1), 2); \ + Bo = ROL32((Ake1^De1), 23); \ + Bu = ROL32((Abi1^Di1), 31); \ + Ba = ROL32((Amo1^Do0), 14); \ + Be = ROL32((Agu0^Du0), 10); \ + Asa1 = Ba ^((~Be)& Bi ); \ + Ake1 = Be ^((~Bi)& Bo ); \ + Abi1 = Bi ^((~Bo)& Bu ); \ + Amo1 = Bo ^((~Bu)& Ba ); \ + Agu0 = Bu ^((~Ba)& Be ); \ + Bi = ROL32((Asa0^Da0), 1); \ + Bo = ROL32((Ake0^De0), 22); \ + Bu = ROL32((Abi0^Di0), 30); \ + Ba = ROL32((Amo0^Do1), 14); \ + Be = ROL32((Agu1^Du1), 10); \ + Asa0 = Ba ^((~Be)& Bi ); \ + Ake0 = Be ^((~Bi)& Bo ); \ + Abi0 = Bi ^((~Bo)& Bu ); \ + Amo0 = Bo ^((~Bu)& Ba ); \ + Agu1 = Bu ^((~Ba)& Be ); \ + Bu = ROL32((Ama1^Da0), 9); \ + Ba = ROL32((Age1^De1), 1); \ + Be = ROL32((Asi1^Di0), 3); \ + Bi = ROL32((Ako0^Do1), 13); \ + Bo = ROL32((Abu1^Du0), 4); \ + Ama1 = Ba ^((~Be)& Bi ); \ + Age1 = Be ^((~Bi)& Bo ); \ + Asi1 = Bi ^((~Bo)& Bu ); \ + Ako0 = Bo ^((~Bu)& Ba ); \ + Abu1 = Bu ^((~Ba)& Be ); \ + Bu = ROL32((Ama0^Da1), 9); \ + Ba = (Age0^De0); \ + Be = ROL32((Asi0^Di1), 3); \ + Bi = ROL32((Ako1^Do0), 12); \ + Bo = ROL32((Abu0^Du1), 4); \ + Ama0 = Ba ^((~Be)& Bi ); \ + Age0 = Be ^((~Bi)& Bo ); \ + Asi0 = Bi ^((~Bo)& Bu ); \ + Ako1 = Bo ^((~Bu)& Ba ); \ + Abu0 = Bu ^((~Ba)& Be ); \ + Be = ROL32((Aka1^Da0), 18); \ + Bi = ROL32((Abe1^De0), 5); \ + Bo = ROL32((Ami0^Di1), 8); \ + Bu = ROL32((Ago1^Do0), 28); \ + Ba = ROL32((Asu1^Du1), 14); \ + Aka1 = Ba ^((~Be)& Bi ); \ + Abe1 = Be ^((~Bi)& Bo ); \ + Ami0 = Bi ^((~Bo)& Bu ); \ + Ago1 = Bo ^((~Bu)& Ba ); \ + Asu1 = Bu ^((~Ba)& Be ); \ + Be = ROL32((Aka0^Da1), 18); \ + Bi = ROL32((Abe0^De1), 5); \ + Bo = ROL32((Ami1^Di0), 7); \ + Bu = ROL32((Ago0^Do1), 28); \ + Ba = ROL32((Asu0^Du0), 13); \ + Aka0 = Ba ^((~Be)& Bi ); \ + Abe0 = Be ^((~Bi)& Bo ); \ + Ami1 = Bi ^((~Bo)& Bu ); \ + Ago0 = Bo ^((~Bu)& Ba ); \ + Asu0 = Bu ^((~Ba)& Be ); \ + Bo = ROL32((Aga1^Da1), 21); \ + Bu = ROL32((Ase0^De0), 1); \ + Ba = ROL32((Aki1^Di0), 31); \ + Be = ROL32((Abo1^Do1), 28); \ + Bi = ROL32((Amu1^Du1), 20); \ + Aga1 = Ba ^((~Be)& Bi ); \ + Ase0 = Be ^((~Bi)& Bo ); \ + Aki1 = Bi ^((~Bo)& Bu ); \ + Abo1 = Bo ^((~Bu)& Ba ); \ + Amu1 = Bu ^((~Ba)& Be ); \ + Bo = ROL32((Aga0^Da0), 20); \ + Bu = ROL32((Ase1^De1), 1); \ + Ba = ROL32((Aki0^Di1), 31); \ + Be = ROL32((Abo0^Do0), 27); \ + Bi = ROL32((Amu0^Du0), 19); \ + Aga0 = Ba ^((~Be)& Bi ); \ + Ase1 = Be ^((~Bi)& Bo ); \ + Aki0 = Bi ^((~Bo)& Bu ); \ + Abo0 = Bo ^((~Bu)& Ba ); \ + Amu0 = Bu ^((~Ba)& Be ); + +#define KeccakRound2() \ + Cx = Aku1^Agu0^Abu1^Asu1^Amu1; \ + Du1 = Ame0^Ake0^Age0^Abe0^Ase1; \ + Da0 = Cx^ROL32(Du1, 1); \ + Cz = Aku0^Agu1^Abu0^Asu0^Amu0; \ + Du0 = Ame1^Ake1^Age1^Abe1^Ase0; \ + Da1 = Cz^Du0; \ + Cw = Agi1^Abi1^Asi1^Ami0^Aki1; \ + Do0 = Cw^ROL32(Cz, 1); \ + Cy = Agi0^Abi0^Asi0^Ami1^Aki0; \ + Do1 = Cy^Cx; \ + Cx = Aba0^Asa1^Ama1^Aka1^Aga1; \ + De0 = Cx^ROL32(Cy, 1); \ + Cz = Aba1^Asa0^Ama0^Aka0^Aga0; \ + De1 = Cz^Cw; \ + Cy = Aso0^Amo0^Ako1^Ago0^Abo0; \ + Di0 = Du0^ROL32(Cy, 1); \ + Cw = Aso1^Amo1^Ako0^Ago1^Abo1; \ + Di1 = Du1^Cw; \ + Du0 = Cw^ROL32(Cz, 1); \ + Du1 = Cy^Cx; \ +\ + Ba = (Aba0^Da0); \ + Be = ROL32((Ake1^De0), 22); \ + Bi = ROL32((Asi0^Di1), 22); \ + Bo = ROL32((Ago0^Do1), 11); \ + Bu = ROL32((Amu1^Du0), 7); \ + Aba0 = Ba ^((~Be)& Bi ); \ + Aba0 ^= *(pRoundConstants++); \ + Ake1 = Be ^((~Bi)& Bo ); \ + Asi0 = Bi ^((~Bo)& Bu ); \ + Ago0 = Bo ^((~Bu)& Ba ); \ + Amu1 = Bu ^((~Ba)& Be ); \ + Ba = (Aba1^Da1); \ + Be = ROL32((Ake0^De1), 22); \ + Bi = ROL32((Asi1^Di0), 21); \ + Bo = ROL32((Ago1^Do0), 10); \ + Bu = ROL32((Amu0^Du1), 7); \ + Aba1 = Ba ^((~Be)& Bi ); \ + Aba1 ^= *(pRoundConstants++); \ + Ake0 = Be ^((~Bi)& Bo ); \ + Asi1 = Bi ^((~Bo)& Bu ); \ + Ago1 = Bo ^((~Bu)& Ba ); \ + Amu0 = Bu ^((~Ba)& Be ); \ + Bi = ROL32((Ama0^Da1), 2); \ + Bo = ROL32((Abe0^De1), 23); \ + Bu = ROL32((Aki0^Di1), 31); \ + Ba = ROL32((Aso1^Do0), 14); \ + Be = ROL32((Agu0^Du0), 10); \ + Ama0 = Ba ^((~Be)& Bi ); \ + Abe0 = Be ^((~Bi)& Bo ); \ + Aki0 = Bi ^((~Bo)& Bu ); \ + Aso1 = Bo ^((~Bu)& Ba ); \ + Agu0 = Bu ^((~Ba)& Be ); \ + Bi = ROL32((Ama1^Da0), 1); \ + Bo = ROL32((Abe1^De0), 22); \ + Bu = ROL32((Aki1^Di0), 30); \ + Ba = ROL32((Aso0^Do1), 14); \ + Be = ROL32((Agu1^Du1), 10); \ + Ama1 = Ba ^((~Be)& Bi ); \ + Abe1 = Be ^((~Bi)& Bo ); \ + Aki1 = Bi ^((~Bo)& Bu ); \ + Aso0 = Bo ^((~Bu)& Ba ); \ + Agu1 = Bu ^((~Ba)& Be ); \ + Bu = ROL32((Aga1^Da0), 9); \ + Ba = ROL32((Ame0^De1), 1); \ + Be = ROL32((Abi1^Di0), 3); \ + Bi = ROL32((Ako1^Do1), 13); \ + Bo = ROL32((Asu1^Du0), 4); \ + Aga1 = Ba ^((~Be)& Bi ); \ + Ame0 = Be ^((~Bi)& Bo ); \ + Abi1 = Bi ^((~Bo)& Bu ); \ + Ako1 = Bo ^((~Bu)& Ba ); \ + Asu1 = Bu ^((~Ba)& Be ); \ + Bu = ROL32((Aga0^Da1), 9); \ + Ba = (Ame1^De0); \ + Be = ROL32((Abi0^Di1), 3); \ + Bi = ROL32((Ako0^Do0), 12); \ + Bo = ROL32((Asu0^Du1), 4); \ + Aga0 = Ba ^((~Be)& Bi ); \ + Ame1 = Be ^((~Bi)& Bo ); \ + Abi0 = Bi ^((~Bo)& Bu ); \ + Ako0 = Bo ^((~Bu)& Ba ); \ + Asu0 = Bu ^((~Ba)& Be ); \ + Be = ROL32((Asa1^Da0), 18); \ + Bi = ROL32((Age1^De0), 5); \ + Bo = ROL32((Ami1^Di1), 8); \ + Bu = ROL32((Abo1^Do0), 28); \ + Ba = ROL32((Aku0^Du1), 14); \ + Asa1 = Ba ^((~Be)& Bi ); \ + Age1 = Be ^((~Bi)& Bo ); \ + Ami1 = Bi ^((~Bo)& Bu ); \ + Abo1 = Bo ^((~Bu)& Ba ); \ + Aku0 = Bu ^((~Ba)& Be ); \ + Be = ROL32((Asa0^Da1), 18); \ + Bi = ROL32((Age0^De1), 5); \ + Bo = ROL32((Ami0^Di0), 7); \ + Bu = ROL32((Abo0^Do1), 28); \ + Ba = ROL32((Aku1^Du0), 13); \ + Asa0 = Ba ^((~Be)& Bi ); \ + Age0 = Be ^((~Bi)& Bo ); \ + Ami0 = Bi ^((~Bo)& Bu ); \ + Abo0 = Bo ^((~Bu)& Ba ); \ + Aku1 = Bu ^((~Ba)& Be ); \ + Bo = ROL32((Aka0^Da1), 21); \ + Bu = ROL32((Ase0^De0), 1); \ + Ba = ROL32((Agi1^Di0), 31); \ + Be = ROL32((Amo0^Do1), 28); \ + Bi = ROL32((Abu0^Du1), 20); \ + Aka0 = Ba ^((~Be)& Bi ); \ + Ase0 = Be ^((~Bi)& Bo ); \ + Agi1 = Bi ^((~Bo)& Bu ); \ + Amo0 = Bo ^((~Bu)& Ba ); \ + Abu0 = Bu ^((~Ba)& Be ); \ + Bo = ROL32((Aka1^Da0), 20); \ + Bu = ROL32((Ase1^De1), 1); \ + Ba = ROL32((Agi0^Di1), 31); \ + Be = ROL32((Amo1^Do0), 27); \ + Bi = ROL32((Abu1^Du0), 19); \ + Aka1 = Ba ^((~Be)& Bi ); \ + Ase1 = Be ^((~Bi)& Bo ); \ + Agi0 = Bi ^((~Bo)& Bu ); \ + Amo1 = Bo ^((~Bu)& Ba ); \ + Abu1 = Bu ^((~Ba)& Be ); + +#define KeccakRound3() \ + Cx = Amu1^Agu0^Asu1^Aku0^Abu0; \ + Du1 = Ake0^Abe1^Ame1^Age0^Ase1; \ + Da0 = Cx^ROL32(Du1, 1); \ + Cz = Amu0^Agu1^Asu0^Aku1^Abu1; \ + Du0 = Ake1^Abe0^Ame0^Age1^Ase0; \ + Da1 = Cz^Du0; \ + Cw = Asi0^Aki0^Abi1^Ami1^Agi1; \ + Do0 = Cw^ROL32(Cz, 1); \ + Cy = Asi1^Aki1^Abi0^Ami0^Agi0; \ + Do1 = Cy^Cx; \ + Cx = Aba0^Ama0^Aga1^Asa1^Aka0; \ + De0 = Cx^ROL32(Cy, 1); \ + Cz = Aba1^Ama1^Aga0^Asa0^Aka1; \ + De1 = Cz^Cw; \ + Cy = Ago1^Aso0^Ako0^Abo0^Amo1; \ + Di0 = Du0^ROL32(Cy, 1); \ + Cw = Ago0^Aso1^Ako1^Abo1^Amo0; \ + Di1 = Du1^Cw; \ + Du0 = Cw^ROL32(Cz, 1); \ + Du1 = Cy^Cx; \ +\ + Ba = (Aba0^Da0); \ + Be = ROL32((Abe0^De0), 22); \ + Bi = ROL32((Abi0^Di1), 22); \ + Bo = ROL32((Abo0^Do1), 11); \ + Bu = ROL32((Abu0^Du0), 7); \ + Aba0 = Ba ^((~Be)& Bi ); \ + Aba0 ^= *(pRoundConstants++); \ + Abe0 = Be ^((~Bi)& Bo ); \ + Abi0 = Bi ^((~Bo)& Bu ); \ + Abo0 = Bo ^((~Bu)& Ba ); \ + Abu0 = Bu ^((~Ba)& Be ); \ + Ba = (Aba1^Da1); \ + Be = ROL32((Abe1^De1), 22); \ + Bi = ROL32((Abi1^Di0), 21); \ + Bo = ROL32((Abo1^Do0), 10); \ + Bu = ROL32((Abu1^Du1), 7); \ + Aba1 = Ba ^((~Be)& Bi ); \ + Aba1 ^= *(pRoundConstants++); \ + Abe1 = Be ^((~Bi)& Bo ); \ + Abi1 = Bi ^((~Bo)& Bu ); \ + Abo1 = Bo ^((~Bu)& Ba ); \ + Abu1 = Bu ^((~Ba)& Be ); \ + Bi = ROL32((Aga0^Da1), 2); \ + Bo = ROL32((Age0^De1), 23); \ + Bu = ROL32((Agi0^Di1), 31); \ + Ba = ROL32((Ago0^Do0), 14); \ + Be = ROL32((Agu0^Du0), 10); \ + Aga0 = Ba ^((~Be)& Bi ); \ + Age0 = Be ^((~Bi)& Bo ); \ + Agi0 = Bi ^((~Bo)& Bu ); \ + Ago0 = Bo ^((~Bu)& Ba ); \ + Agu0 = Bu ^((~Ba)& Be ); \ + Bi = ROL32((Aga1^Da0), 1); \ + Bo = ROL32((Age1^De0), 22); \ + Bu = ROL32((Agi1^Di0), 30); \ + Ba = ROL32((Ago1^Do1), 14); \ + Be = ROL32((Agu1^Du1), 10); \ + Aga1 = Ba ^((~Be)& Bi ); \ + Age1 = Be ^((~Bi)& Bo ); \ + Agi1 = Bi ^((~Bo)& Bu ); \ + Ago1 = Bo ^((~Bu)& Ba ); \ + Agu1 = Bu ^((~Ba)& Be ); \ + Bu = ROL32((Aka0^Da0), 9); \ + Ba = ROL32((Ake0^De1), 1); \ + Be = ROL32((Aki0^Di0), 3); \ + Bi = ROL32((Ako0^Do1), 13); \ + Bo = ROL32((Aku0^Du0), 4); \ + Aka0 = Ba ^((~Be)& Bi ); \ + Ake0 = Be ^((~Bi)& Bo ); \ + Aki0 = Bi ^((~Bo)& Bu ); \ + Ako0 = Bo ^((~Bu)& Ba ); \ + Aku0 = Bu ^((~Ba)& Be ); \ + Bu = ROL32((Aka1^Da1), 9); \ + Ba = (Ake1^De0); \ + Be = ROL32((Aki1^Di1), 3); \ + Bi = ROL32((Ako1^Do0), 12); \ + Bo = ROL32((Aku1^Du1), 4); \ + Aka1 = Ba ^((~Be)& Bi ); \ + Ake1 = Be ^((~Bi)& Bo ); \ + Aki1 = Bi ^((~Bo)& Bu ); \ + Ako1 = Bo ^((~Bu)& Ba ); \ + Aku1 = Bu ^((~Ba)& Be ); \ + Be = ROL32((Ama0^Da0), 18); \ + Bi = ROL32((Ame0^De0), 5); \ + Bo = ROL32((Ami0^Di1), 8); \ + Bu = ROL32((Amo0^Do0), 28); \ + Ba = ROL32((Amu0^Du1), 14); \ + Ama0 = Ba ^((~Be)& Bi ); \ + Ame0 = Be ^((~Bi)& Bo ); \ + Ami0 = Bi ^((~Bo)& Bu ); \ + Amo0 = Bo ^((~Bu)& Ba ); \ + Amu0 = Bu ^((~Ba)& Be ); \ + Be = ROL32((Ama1^Da1), 18); \ + Bi = ROL32((Ame1^De1), 5); \ + Bo = ROL32((Ami1^Di0), 7); \ + Bu = ROL32((Amo1^Do1), 28); \ + Ba = ROL32((Amu1^Du0), 13); \ + Ama1 = Ba ^((~Be)& Bi ); \ + Ame1 = Be ^((~Bi)& Bo ); \ + Ami1 = Bi ^((~Bo)& Bu ); \ + Amo1 = Bo ^((~Bu)& Ba ); \ + Amu1 = Bu ^((~Ba)& Be ); \ + Bo = ROL32((Asa0^Da1), 21); \ + Bu = ROL32((Ase0^De0), 1); \ + Ba = ROL32((Asi0^Di0), 31); \ + Be = ROL32((Aso0^Do1), 28); \ + Bi = ROL32((Asu0^Du1), 20); \ + Asa0 = Ba ^((~Be)& Bi ); \ + Ase0 = Be ^((~Bi)& Bo ); \ + Asi0 = Bi ^((~Bo)& Bu ); \ + Aso0 = Bo ^((~Bu)& Ba ); \ + Asu0 = Bu ^((~Ba)& Be ); \ + Bo = ROL32((Asa1^Da0), 20); \ + Bu = ROL32((Ase1^De1), 1); \ + Ba = ROL32((Asi1^Di1), 31); \ + Be = ROL32((Aso1^Do0), 27); \ + Bi = ROL32((Asu1^Du0), 19); \ + Asa1 = Ba ^((~Be)& Bi ); \ + Ase1 = Be ^((~Bi)& Bo ); \ + Asi1 = Bi ^((~Bo)& Bu ); \ + Aso1 = Bo ^((~Bu)& Ba ); \ + Asu1 = Bu ^((~Ba)& Be ); + +void KeccakP1600_Permute_Nrounds(void *state, unsigned int nRounds) +{ + UINT32 Da0, De0, Di0, Do0, Du0; + UINT32 Da1, De1, Di1, Do1, Du1; + UINT32 Ba, Be, Bi, Bo, Bu; + UINT32 Cx, Cy, Cz, Cw; + const UINT32 *pRoundConstants = KeccakF1600RoundConstants_int2+(24-nRounds)*2; + UINT32 *stateAsHalfLanes = (UINT32*)state; + #define Aba0 stateAsHalfLanes[ 0] + #define Aba1 stateAsHalfLanes[ 1] + #define Abe0 stateAsHalfLanes[ 2] + #define Abe1 stateAsHalfLanes[ 3] + #define Abi0 stateAsHalfLanes[ 4] + #define Abi1 stateAsHalfLanes[ 5] + #define Abo0 stateAsHalfLanes[ 6] + #define Abo1 stateAsHalfLanes[ 7] + #define Abu0 stateAsHalfLanes[ 8] + #define Abu1 stateAsHalfLanes[ 9] + #define Aga0 stateAsHalfLanes[10] + #define Aga1 stateAsHalfLanes[11] + #define Age0 stateAsHalfLanes[12] + #define Age1 stateAsHalfLanes[13] + #define Agi0 stateAsHalfLanes[14] + #define Agi1 stateAsHalfLanes[15] + #define Ago0 stateAsHalfLanes[16] + #define Ago1 stateAsHalfLanes[17] + #define Agu0 stateAsHalfLanes[18] + #define Agu1 stateAsHalfLanes[19] + #define Aka0 stateAsHalfLanes[20] + #define Aka1 stateAsHalfLanes[21] + #define Ake0 stateAsHalfLanes[22] + #define Ake1 stateAsHalfLanes[23] + #define Aki0 stateAsHalfLanes[24] + #define Aki1 stateAsHalfLanes[25] + #define Ako0 stateAsHalfLanes[26] + #define Ako1 stateAsHalfLanes[27] + #define Aku0 stateAsHalfLanes[28] + #define Aku1 stateAsHalfLanes[29] + #define Ama0 stateAsHalfLanes[30] + #define Ama1 stateAsHalfLanes[31] + #define Ame0 stateAsHalfLanes[32] + #define Ame1 stateAsHalfLanes[33] + #define Ami0 stateAsHalfLanes[34] + #define Ami1 stateAsHalfLanes[35] + #define Amo0 stateAsHalfLanes[36] + #define Amo1 stateAsHalfLanes[37] + #define Amu0 stateAsHalfLanes[38] + #define Amu1 stateAsHalfLanes[39] + #define Asa0 stateAsHalfLanes[40] + #define Asa1 stateAsHalfLanes[41] + #define Ase0 stateAsHalfLanes[42] + #define Ase1 stateAsHalfLanes[43] + #define Asi0 stateAsHalfLanes[44] + #define Asi1 stateAsHalfLanes[45] + #define Aso0 stateAsHalfLanes[46] + #define Aso1 stateAsHalfLanes[47] + #define Asu0 stateAsHalfLanes[48] + #define Asu1 stateAsHalfLanes[49] + + nRounds &= 3; + switch ( nRounds ) + { + #define I0 Ba + #define I1 Be + #define T0 Bi + #define T1 Bo + #define SwapPI13( in0,in1,in2,in3,eo0,eo1,eo2,eo3 ) \ + I0 = (in0)[0]; I1 = (in0)[1]; \ + T0 = (in1)[0]; T1 = (in1)[1]; \ + (in0)[eo0] = T0; (in0)[eo0^1] = T1; \ + T0 = (in2)[0]; T1 = (in2)[1]; \ + (in1)[eo1] = T0; (in1)[eo1^1] = T1; \ + T0 = (in3)[0]; T1 = (in3)[1]; \ + (in2)[eo2] = T0; (in2)[eo2^1] = T1; \ + (in3)[eo3] = I0; (in3)[eo3^1] = I1 + #define SwapPI2( in0,in1,in2,in3 ) \ + I0 = (in0)[0]; I1 = (in0)[1]; \ + T0 = (in1)[0]; T1 = (in1)[1]; \ + (in0)[1] = T0; (in0)[0] = T1; \ + (in1)[1] = I0; (in1)[0] = I1; \ + I0 = (in2)[0]; I1 = (in2)[1]; \ + T0 = (in3)[0]; T1 = (in3)[1]; \ + (in2)[1] = T0; (in2)[0] = T1; \ + (in3)[1] = I0; (in3)[0] = I1 + #define SwapEO( even,odd ) T0 = even; even = odd; odd = T0 + + case 1: + SwapPI13( &Aga0, &Aka0, &Asa0, &Ama0, 1, 0, 1, 0 ); + SwapPI13( &Abe0, &Age0, &Ame0, &Ake0, 0, 1, 0, 1 ); + SwapPI13( &Abi0, &Aki0, &Agi0, &Asi0, 1, 0, 1, 0 ); + SwapEO( Ami0, Ami1 ); + SwapPI13( &Abo0, &Amo0, &Aso0, &Ago0, 1, 0, 1, 0 ); + SwapEO( Ako0, Ako1 ); + SwapPI13( &Abu0, &Asu0, &Aku0, &Amu0, 0, 1, 0, 1 ); + break; + + case 2: + SwapPI2( &Aga0, &Asa0, &Aka0, &Ama0 ); + SwapPI2( &Abe0, &Ame0, &Age0, &Ake0 ); + SwapPI2( &Abi0, &Agi0, &Aki0, &Asi0 ); + SwapPI2( &Abo0, &Aso0, &Ago0, &Amo0 ); + SwapPI2( &Abu0, &Aku0, &Amu0, &Asu0 ); + break; + + case 3: + SwapPI13( &Aga0, &Ama0, &Asa0, &Aka0, 0, 1, 0, 1 ); + SwapPI13( &Abe0, &Ake0, &Ame0, &Age0, 1, 0, 1, 0 ); + SwapPI13( &Abi0, &Asi0, &Agi0, &Aki0, 0, 1, 0, 1 ); + SwapEO( Ami0, Ami1 ); + SwapPI13( &Abo0, &Ago0, &Aso0, &Amo0, 0, 1, 0, 1 ); + SwapEO( Ako0, Ako1 ); + SwapPI13( &Abu0, &Amu0, &Aku0, &Asu0, 1, 0, 1, 0 ); + break; + #undef I0 + #undef I1 + #undef T0 + #undef T1 + #undef SwapPI13 + #undef SwapPI2 + #undef SwapEO + } + + do + { + /* Code for 4 rounds, using factor 2 interleaving, 64-bit lanes mapped to 32-bit words */ + switch ( nRounds ) + { + case 0: KeccakRound0(); /* fall through */ + case 3: KeccakRound1(); + case 2: KeccakRound2(); + case 1: KeccakRound3(); + } + nRounds = 0; + } + while ( *pRoundConstants != 0xFF ); + + #undef Aba0 + #undef Aba1 + #undef Abe0 + #undef Abe1 + #undef Abi0 + #undef Abi1 + #undef Abo0 + #undef Abo1 + #undef Abu0 + #undef Abu1 + #undef Aga0 + #undef Aga1 + #undef Age0 + #undef Age1 + #undef Agi0 + #undef Agi1 + #undef Ago0 + #undef Ago1 + #undef Agu0 + #undef Agu1 + #undef Aka0 + #undef Aka1 + #undef Ake0 + #undef Ake1 + #undef Aki0 + #undef Aki1 + #undef Ako0 + #undef Ako1 + #undef Aku0 + #undef Aku1 + #undef Ama0 + #undef Ama1 + #undef Ame0 + #undef Ame1 + #undef Ami0 + #undef Ami1 + #undef Amo0 + #undef Amo1 + #undef Amu0 + #undef Amu1 + #undef Asa0 + #undef Asa1 + #undef Ase0 + #undef Ase1 + #undef Asi0 + #undef Asi1 + #undef Aso0 + #undef Aso1 + #undef Asu0 + #undef Asu1 +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_Permute_12rounds(void *state) +{ + KeccakP1600_Permute_Nrounds(state, 12); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_Permute_24rounds(void *state) +{ + KeccakP1600_Permute_Nrounds(state, 24); +} diff --git a/ext/hash/sha3/generic32lc/KeccakSponge.c b/ext/hash/sha3/generic32lc/KeccakSponge.c new file mode 100644 index 00000000000..08d4a19f0ac --- /dev/null +++ b/ext/hash/sha3/generic32lc/KeccakSponge.c @@ -0,0 +1,110 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include "KeccakSponge.h" + +#ifdef KeccakReference + #include "displayIntermediateValues.h" +#endif + +#ifndef KeccakP200_excluded + #include "KeccakP-200-SnP.h" + + #define prefix KeccakWidth200 + #define SnP KeccakP200 + #define SnP_width 200 + #define SnP_Permute KeccakP200_Permute_18rounds + #if defined(KeccakF200_FastLoop_supported) + #define SnP_FastLoop_Absorb KeccakF200_FastLoop_Absorb + #endif + #include "KeccakSponge.inc" + #undef prefix + #undef SnP + #undef SnP_width + #undef SnP_Permute + #undef SnP_FastLoop_Absorb +#endif + +#ifndef KeccakP400_excluded + #include "KeccakP-400-SnP.h" + + #define prefix KeccakWidth400 + #define SnP KeccakP400 + #define SnP_width 400 + #define SnP_Permute KeccakP400_Permute_20rounds + #if defined(KeccakF400_FastLoop_supported) + #define SnP_FastLoop_Absorb KeccakF400_FastLoop_Absorb + #endif + #include "KeccakSponge.inc" + #undef prefix + #undef SnP + #undef SnP_width + #undef SnP_Permute + #undef SnP_FastLoop_Absorb +#endif + +#ifndef KeccakP800_excluded + #include "KeccakP-800-SnP.h" + + #define prefix KeccakWidth800 + #define SnP KeccakP800 + #define SnP_width 800 + #define SnP_Permute KeccakP800_Permute_22rounds + #if defined(KeccakF800_FastLoop_supported) + #define SnP_FastLoop_Absorb KeccakF800_FastLoop_Absorb + #endif + #include "KeccakSponge.inc" + #undef prefix + #undef SnP + #undef SnP_width + #undef SnP_Permute + #undef SnP_FastLoop_Absorb +#endif + +#ifndef KeccakP1600_excluded + #include "KeccakP-1600-SnP.h" + + #define prefix KeccakWidth1600 + #define SnP KeccakP1600 + #define SnP_width 1600 + #define SnP_Permute KeccakP1600_Permute_24rounds + #if defined(KeccakF1600_FastLoop_supported) + #define SnP_FastLoop_Absorb KeccakF1600_FastLoop_Absorb + #endif + #include "KeccakSponge.inc" + #undef prefix + #undef SnP + #undef SnP_width + #undef SnP_Permute + #undef SnP_FastLoop_Absorb +#endif + +#ifndef KeccakP1600_excluded + #include "KeccakP-1600-SnP.h" + + #define prefix KeccakWidth1600_12rounds + #define SnP KeccakP1600 + #define SnP_width 1600 + #define SnP_Permute KeccakP1600_Permute_12rounds + #if defined(KeccakP1600_12rounds_FastLoop_supported) + #define SnP_FastLoop_Absorb KeccakP1600_12rounds_FastLoop_Absorb + #endif + #include "KeccakSponge.inc" + #undef prefix + #undef SnP + #undef SnP_width + #undef SnP_Permute + #undef SnP_FastLoop_Absorb +#endif diff --git a/ext/hash/sha3/generic32lc/KeccakSponge.h b/ext/hash/sha3/generic32lc/KeccakSponge.h new file mode 100644 index 00000000000..a8526fe749c --- /dev/null +++ b/ext/hash/sha3/generic32lc/KeccakSponge.h @@ -0,0 +1,178 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _KeccakSponge_h_ +#define _KeccakSponge_h_ + +/** General information + * + * The following type and functions are not actually implemented. Their + * documentation is generic, with the prefix Prefix replaced by + * - KeccakWidth200 for a sponge function based on Keccak-f[200] + * - KeccakWidth400 for a sponge function based on Keccak-f[400] + * - KeccakWidth800 for a sponge function based on Keccak-f[800] + * - KeccakWidth1600 for a sponge function based on Keccak-f[1600] + * + * In all these functions, the rate and capacity must sum to the width of the + * chosen permutation. For instance, to use the sponge function + * Keccak[r=1344, c=256], one must use KeccakWidth1600_Sponge() or a combination + * of KeccakWidth1600_SpongeInitialize(), KeccakWidth1600_SpongeAbsorb(), + * KeccakWidth1600_SpongeAbsorbLastFewBits() and + * KeccakWidth1600_SpongeSqueeze(). + * + * The Prefix_SpongeInstance contains the sponge instance attributes for use + * with the Prefix_Sponge* functions. + * It gathers the state processed by the permutation as well as the rate, + * the position of input/output bytes in the state and the phase + * (absorbing or squeezing). + */ + +#ifdef DontReallyInclude_DocumentationOnly +/** Function to evaluate the sponge function Keccak[r, c] in a single call. + * @param rate The value of the rate r. + * @param capacity The value of the capacity c. + * @param input Pointer to the input message (before the suffix). + * @param inputByteLen The length of the input message in bytes. + * @param suffix Byte containing from 0 to 7 suffix bits + * that must be absorbed after @a input. + * These n bits must be in the least significant bit positions. + * These bits must be delimited with a bit 1 at position n + * (counting from 0=LSB to 7=MSB) and followed by bits 0 + * from position n+1 to position 7. + * Some examples: + * - If no bits are to be absorbed, then @a suffix must be 0x01. + * - If the 2-bit sequence 0,0 is to be absorbed, @a suffix must be 0x04. + * - If the 5-bit sequence 0,1,0,0,1 is to be absorbed, @a suffix must be 0x32. + * - If the 7-bit sequence 1,1,0,1,0,0,0 is to be absorbed, @a suffix must be 0x8B. + * . + * @param output Pointer to the output buffer. + * @param outputByteLen The desired number of output bytes. + * @pre One must have r+c equal to the supported width of this implementation + * and the rate a multiple of 8 bits (one byte) in this implementation. + * @pre @a suffix ≠ 0x00 + * @return Zero if successful, 1 otherwise. + */ +int Prefix_Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, size_t inputByteLen, unsigned char suffix, unsigned char *output, size_t outputByteLen); + +/** + * Function to initialize the state of the Keccak[r, c] sponge function. + * The phase of the sponge function is set to absorbing. + * @param spongeInstance Pointer to the sponge instance to be initialized. + * @param rate The value of the rate r. + * @param capacity The value of the capacity c. + * @pre One must have r+c equal to the supported width of this implementation + * and the rate a multiple of 8 bits (one byte) in this implementation. + * @return Zero if successful, 1 otherwise. + */ +int Prefix_SpongeInitialize(Prefix_SpongeInstance *spongeInstance, unsigned int rate, unsigned int capacity); + +/** + * Function to give input data bytes for the sponge function to absorb. + * @param spongeInstance Pointer to the sponge instance initialized by Prefix_SpongeInitialize(). + * @param data Pointer to the input data. + * @param dataByteLen The number of input bytes provided in the input data. + * @pre The sponge function must be in the absorbing phase, + * i.e., Prefix_SpongeSqueeze() or Prefix_SpongeAbsorbLastFewBits() + * must not have been called before. + * @return Zero if successful, 1 otherwise. + */ +int Prefix_SpongeAbsorb(Prefix_SpongeInstance *spongeInstance, const unsigned char *data, size_t dataByteLen); + +/** + * Function to give input data bits for the sponge function to absorb + * and then to switch to the squeezing phase. + * @param spongeInstance Pointer to the sponge instance initialized by Prefix_SpongeInitialize(). + * @param delimitedData Byte containing from 0 to 7 trailing bits + * that must be absorbed. + * These n bits must be in the least significant bit positions. + * These bits must be delimited with a bit 1 at position n + * (counting from 0=LSB to 7=MSB) and followed by bits 0 + * from position n+1 to position 7. + * Some examples: + * - If no bits are to be absorbed, then @a delimitedData must be 0x01. + * - If the 2-bit sequence 0,0 is to be absorbed, @a delimitedData must be 0x04. + * - If the 5-bit sequence 0,1,0,0,1 is to be absorbed, @a delimitedData must be 0x32. + * - If the 7-bit sequence 1,1,0,1,0,0,0 is to be absorbed, @a delimitedData must be 0x8B. + * . + * @pre The sponge function must be in the absorbing phase, + * i.e., Prefix_SpongeSqueeze() or Prefix_SpongeAbsorbLastFewBits() + * must not have been called before. + * @pre @a delimitedData ≠ 0x00 + * @return Zero if successful, 1 otherwise. + */ +int Prefix_SpongeAbsorbLastFewBits(Prefix_SpongeInstance *spongeInstance, unsigned char delimitedData); + +/** + * Function to squeeze output data from the sponge function. + * If the sponge function was in the absorbing phase, this function + * switches it to the squeezing phase + * as if Prefix_SpongeAbsorbLastFewBits(spongeInstance, 0x01) was called. + * @param spongeInstance Pointer to the sponge instance initialized by Prefix_SpongeInitialize(). + * @param data Pointer to the buffer where to store the output data. + * @param dataByteLen The number of output bytes desired. + * @return Zero if successful, 1 otherwise. + */ +int Prefix_SpongeSqueeze(Prefix_SpongeInstance *spongeInstance, unsigned char *data, size_t dataByteLen); +#endif + +#include +#include "align.h" + +#define KCP_DeclareSpongeStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_SpongeInstanceStruct { \ + unsigned char state[size]; \ + unsigned int rate; \ + unsigned int byteIOIndex; \ + int squeezing; \ + } prefix##_SpongeInstance; + +#define KCP_DeclareSpongeFunctions(prefix) \ + int prefix##_Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, size_t inputByteLen, unsigned char suffix, unsigned char *output, size_t outputByteLen); \ + int prefix##_SpongeInitialize(prefix##_SpongeInstance *spongeInstance, unsigned int rate, unsigned int capacity); \ + int prefix##_SpongeAbsorb(prefix##_SpongeInstance *spongeInstance, const unsigned char *data, size_t dataByteLen); \ + int prefix##_SpongeAbsorbLastFewBits(prefix##_SpongeInstance *spongeInstance, unsigned char delimitedData); \ + int prefix##_SpongeSqueeze(prefix##_SpongeInstance *spongeInstance, unsigned char *data, size_t dataByteLen); + +#ifndef KeccakP200_excluded + #include "KeccakP-200-SnP.h" + KCP_DeclareSpongeStructure(KeccakWidth200, KeccakP200_stateSizeInBytes, KeccakP200_stateAlignment) + KCP_DeclareSpongeFunctions(KeccakWidth200) +#endif + +#ifndef KeccakP400_excluded + #include "KeccakP-400-SnP.h" + KCP_DeclareSpongeStructure(KeccakWidth400, KeccakP400_stateSizeInBytes, KeccakP400_stateAlignment) + KCP_DeclareSpongeFunctions(KeccakWidth400) +#endif + +#ifndef KeccakP800_excluded + #include "KeccakP-800-SnP.h" + KCP_DeclareSpongeStructure(KeccakWidth800, KeccakP800_stateSizeInBytes, KeccakP800_stateAlignment) + KCP_DeclareSpongeFunctions(KeccakWidth800) +#endif + +#ifndef KeccakP1600_excluded + #include "KeccakP-1600-SnP.h" + KCP_DeclareSpongeStructure(KeccakWidth1600, KeccakP1600_stateSizeInBytes, KeccakP1600_stateAlignment) + KCP_DeclareSpongeFunctions(KeccakWidth1600) +#endif + +#ifndef KeccakP1600_excluded + #include "KeccakP-1600-SnP.h" + KCP_DeclareSpongeStructure(KeccakWidth1600_12rounds, KeccakP1600_stateSizeInBytes, KeccakP1600_stateAlignment) + KCP_DeclareSpongeFunctions(KeccakWidth1600_12rounds) +#endif + +#endif diff --git a/ext/hash/sha3/generic32lc/KeccakSponge.inc b/ext/hash/sha3/generic32lc/KeccakSponge.inc new file mode 100644 index 00000000000..42a15aac6d9 --- /dev/null +++ b/ext/hash/sha3/generic32lc/KeccakSponge.inc @@ -0,0 +1,313 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define JOIN0(a, b) a ## b +#define JOIN(a, b) JOIN0(a, b) + +#define Sponge JOIN(prefix, _Sponge) +#define SpongeInstance JOIN(prefix, _SpongeInstance) +#define SpongeInitialize JOIN(prefix, _SpongeInitialize) +#define SpongeAbsorb JOIN(prefix, _SpongeAbsorb) +#define SpongeAbsorbLastFewBits JOIN(prefix, _SpongeAbsorbLastFewBits) +#define SpongeSqueeze JOIN(prefix, _SpongeSqueeze) + +#define SnP_stateSizeInBytes JOIN(SnP, _stateSizeInBytes) +#define SnP_stateAlignment JOIN(SnP, _stateAlignment) +#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize) +#define SnP_Initialize JOIN(SnP, _Initialize) +#define SnP_AddByte JOIN(SnP, _AddByte) +#define SnP_AddBytes JOIN(SnP, _AddBytes) +#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes) + +int Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, size_t inputByteLen, unsigned char suffix, unsigned char *output, size_t outputByteLen) +{ + ALIGN(SnP_stateAlignment) unsigned char state[SnP_stateSizeInBytes]; + unsigned int partialBlock; + const unsigned char *curInput = input; + unsigned char *curOutput = output; + unsigned int rateInBytes = rate/8; + + if (rate+capacity != SnP_width) + return 1; + if ((rate <= 0) || (rate > SnP_width) || ((rate % 8) != 0)) + return 1; + if (suffix == 0) + return 1; + + /* Initialize the state */ + SnP_StaticInitialize(); + SnP_Initialize(state); + + /* First, absorb whole blocks */ +#ifdef SnP_FastLoop_Absorb + if (((rateInBytes % (SnP_width/200)) == 0) && (inputByteLen >= rateInBytes)) { + /* fast lane: whole lane rate */ + size_t j; + j = SnP_FastLoop_Absorb(state, rateInBytes/(SnP_width/200), curInput, inputByteLen); + curInput += j; + inputByteLen -= j; + } +#endif + while(inputByteLen >= (size_t)rateInBytes) { + #ifdef KeccakReference + displayBytes(1, "Block to be absorbed", curInput, rateInBytes); + #endif + SnP_AddBytes(state, curInput, 0, rateInBytes); + SnP_Permute(state); + curInput += rateInBytes; + inputByteLen -= rateInBytes; + } + + /* Then, absorb what remains */ + partialBlock = (unsigned int)inputByteLen; + #ifdef KeccakReference + displayBytes(1, "Block to be absorbed (part)", curInput, partialBlock); + #endif + SnP_AddBytes(state, curInput, 0, partialBlock); + + /* Finally, absorb the suffix */ + #ifdef KeccakReference + { + unsigned char delimitedData1[1]; + delimitedData1[0] = suffix; + displayBytes(1, "Block to be absorbed (last few bits + first bit of padding)", delimitedData1, 1); + } + #endif + /* Last few bits, whose delimiter coincides with first bit of padding */ + SnP_AddByte(state, suffix, partialBlock); + /* If the first bit of padding is at position rate-1, we need a whole new block for the second bit of padding */ + if ((suffix >= 0x80) && (partialBlock == (rateInBytes-1))) + SnP_Permute(state); + /* Second bit of padding */ + SnP_AddByte(state, 0x80, rateInBytes-1); + #ifdef KeccakReference + { + unsigned char block[SnP_width/8]; + memset(block, 0, SnP_width/8); + block[rateInBytes-1] = 0x80; + displayBytes(1, "Second bit of padding", block, rateInBytes); + } + #endif + SnP_Permute(state); + #ifdef KeccakReference + displayText(1, "--- Switching to squeezing phase ---"); + #endif + + /* First, output whole blocks */ + while(outputByteLen > (size_t)rateInBytes) { + SnP_ExtractBytes(state, curOutput, 0, rateInBytes); + SnP_Permute(state); + #ifdef KeccakReference + displayBytes(1, "Squeezed block", curOutput, rateInBytes); + #endif + curOutput += rateInBytes; + outputByteLen -= rateInBytes; + } + + /* Finally, output what remains */ + partialBlock = (unsigned int)outputByteLen; + SnP_ExtractBytes(state, curOutput, 0, partialBlock); + #ifdef KeccakReference + displayBytes(1, "Squeezed block (part)", curOutput, partialBlock); + #endif + + return 0; +} + +/* ---------------------------------------------------------------- */ +/* ---------------------------------------------------------------- */ +/* ---------------------------------------------------------------- */ + +int SpongeInitialize(SpongeInstance *instance, unsigned int rate, unsigned int capacity) +{ + if (rate+capacity != SnP_width) + return 1; + if ((rate <= 0) || (rate > SnP_width) || ((rate % 8) != 0)) + return 1; + SnP_StaticInitialize(); + SnP_Initialize(instance->state); + instance->rate = rate; + instance->byteIOIndex = 0; + instance->squeezing = 0; + + return 0; +} + +/* ---------------------------------------------------------------- */ + +int SpongeAbsorb(SpongeInstance *instance, const unsigned char *data, size_t dataByteLen) +{ + size_t i, j; + unsigned int partialBlock; + const unsigned char *curData; + unsigned int rateInBytes = instance->rate/8; + + if (instance->squeezing) + return 1; /* Too late for additional input */ + + i = 0; + curData = data; + while(i < dataByteLen) { + if ((instance->byteIOIndex == 0) && (dataByteLen >= (i + rateInBytes))) { +#ifdef SnP_FastLoop_Absorb + /* processing full blocks first */ + if ((rateInBytes % (SnP_width/200)) == 0) { + /* fast lane: whole lane rate */ + j = SnP_FastLoop_Absorb(instance->state, rateInBytes/(SnP_width/200), curData, dataByteLen - i); + i += j; + curData += j; + } + else { +#endif + for(j=dataByteLen-i; j>=rateInBytes; j-=rateInBytes) { + #ifdef KeccakReference + displayBytes(1, "Block to be absorbed", curData, rateInBytes); + #endif + SnP_AddBytes(instance->state, curData, 0, rateInBytes); + SnP_Permute(instance->state); + curData+=rateInBytes; + } + i = dataByteLen - j; +#ifdef SnP_FastLoop_Absorb + } +#endif + } + else { + /* normal lane: using the message queue */ + partialBlock = (unsigned int)(dataByteLen - i); + if (partialBlock+instance->byteIOIndex > rateInBytes) + partialBlock = rateInBytes-instance->byteIOIndex; + #ifdef KeccakReference + displayBytes(1, "Block to be absorbed (part)", curData, partialBlock); + #endif + i += partialBlock; + + SnP_AddBytes(instance->state, curData, instance->byteIOIndex, partialBlock); + curData += partialBlock; + instance->byteIOIndex += partialBlock; + if (instance->byteIOIndex == rateInBytes) { + SnP_Permute(instance->state); + instance->byteIOIndex = 0; + } + } + } + return 0; +} + +/* ---------------------------------------------------------------- */ + +int SpongeAbsorbLastFewBits(SpongeInstance *instance, unsigned char delimitedData) +{ + unsigned int rateInBytes = instance->rate/8; + + if (delimitedData == 0) + return 1; + if (instance->squeezing) + return 1; /* Too late for additional input */ + + #ifdef KeccakReference + { + unsigned char delimitedData1[1]; + delimitedData1[0] = delimitedData; + displayBytes(1, "Block to be absorbed (last few bits + first bit of padding)", delimitedData1, 1); + } + #endif + /* Last few bits, whose delimiter coincides with first bit of padding */ + SnP_AddByte(instance->state, delimitedData, instance->byteIOIndex); + /* If the first bit of padding is at position rate-1, we need a whole new block for the second bit of padding */ + if ((delimitedData >= 0x80) && (instance->byteIOIndex == (rateInBytes-1))) + SnP_Permute(instance->state); + /* Second bit of padding */ + SnP_AddByte(instance->state, 0x80, rateInBytes-1); + #ifdef KeccakReference + { + unsigned char block[SnP_width/8]; + memset(block, 0, SnP_width/8); + block[rateInBytes-1] = 0x80; + displayBytes(1, "Second bit of padding", block, rateInBytes); + } + #endif + SnP_Permute(instance->state); + instance->byteIOIndex = 0; + instance->squeezing = 1; + #ifdef KeccakReference + displayText(1, "--- Switching to squeezing phase ---"); + #endif + return 0; +} + +/* ---------------------------------------------------------------- */ + +int SpongeSqueeze(SpongeInstance *instance, unsigned char *data, size_t dataByteLen) +{ + size_t i, j; + unsigned int partialBlock; + unsigned int rateInBytes = instance->rate/8; + unsigned char *curData; + + if (!instance->squeezing) + SpongeAbsorbLastFewBits(instance, 0x01); + + i = 0; + curData = data; + while(i < dataByteLen) { + if ((instance->byteIOIndex == rateInBytes) && (dataByteLen >= (i + rateInBytes))) { + for(j=dataByteLen-i; j>=rateInBytes; j-=rateInBytes) { + SnP_Permute(instance->state); + SnP_ExtractBytes(instance->state, curData, 0, rateInBytes); + #ifdef KeccakReference + displayBytes(1, "Squeezed block", curData, rateInBytes); + #endif + curData+=rateInBytes; + } + i = dataByteLen - j; + } + else { + /* normal lane: using the message queue */ + if (instance->byteIOIndex == rateInBytes) { + SnP_Permute(instance->state); + instance->byteIOIndex = 0; + } + partialBlock = (unsigned int)(dataByteLen - i); + if (partialBlock+instance->byteIOIndex > rateInBytes) + partialBlock = rateInBytes-instance->byteIOIndex; + i += partialBlock; + + SnP_ExtractBytes(instance->state, curData, instance->byteIOIndex, partialBlock); + #ifdef KeccakReference + displayBytes(1, "Squeezed block (part)", curData, partialBlock); + #endif + curData += partialBlock; + instance->byteIOIndex += partialBlock; + } + } + return 0; +} + +/* ---------------------------------------------------------------- */ + +#undef Sponge +#undef SpongeInstance +#undef SpongeInitialize +#undef SpongeAbsorb +#undef SpongeAbsorbLastFewBits +#undef SpongeSqueeze +#undef SnP_stateSizeInBytes +#undef SnP_stateAlignment +#undef SnP_StaticInitialize +#undef SnP_Initialize +#undef SnP_AddByte +#undef SnP_AddBytes +#undef SnP_ExtractBytes diff --git a/ext/hash/sha3/generic32lc/SnP-Relaned.h b/ext/hash/sha3/generic32lc/SnP-Relaned.h new file mode 100644 index 00000000000..086e635ff8f --- /dev/null +++ b/ext/hash/sha3/generic32lc/SnP-Relaned.h @@ -0,0 +1,134 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _SnP_Relaned_h_ +#define _SnP_Relaned_h_ + +#define SnP_AddBytes(state, data, offset, length, SnP_AddLanes, SnP_AddBytesInLane, SnP_laneLengthInBytes) \ + { \ + if ((offset) == 0) { \ + SnP_AddLanes(state, data, (length)/SnP_laneLengthInBytes); \ + SnP_AddBytesInLane(state, \ + (length)/SnP_laneLengthInBytes, \ + (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + 0, \ + (length)%SnP_laneLengthInBytes); \ + } \ + else { \ + unsigned int _sizeLeft = (length); \ + unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \ + unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \ + const unsigned char *_curData = (data); \ + while(_sizeLeft > 0) { \ + unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \ + if (_bytesInLane > _sizeLeft) \ + _bytesInLane = _sizeLeft; \ + SnP_AddBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \ + _sizeLeft -= _bytesInLane; \ + _lanePosition++; \ + _offsetInLane = 0; \ + _curData += _bytesInLane; \ + } \ + } \ + } + +#define SnP_OverwriteBytes(state, data, offset, length, SnP_OverwriteLanes, SnP_OverwriteBytesInLane, SnP_laneLengthInBytes) \ + { \ + if ((offset) == 0) { \ + SnP_OverwriteLanes(state, data, (length)/SnP_laneLengthInBytes); \ + SnP_OverwriteBytesInLane(state, \ + (length)/SnP_laneLengthInBytes, \ + (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + 0, \ + (length)%SnP_laneLengthInBytes); \ + } \ + else { \ + unsigned int _sizeLeft = (length); \ + unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \ + unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \ + const unsigned char *_curData = (data); \ + while(_sizeLeft > 0) { \ + unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \ + if (_bytesInLane > _sizeLeft) \ + _bytesInLane = _sizeLeft; \ + SnP_OverwriteBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \ + _sizeLeft -= _bytesInLane; \ + _lanePosition++; \ + _offsetInLane = 0; \ + _curData += _bytesInLane; \ + } \ + } \ + } + +#define SnP_ExtractBytes(state, data, offset, length, SnP_ExtractLanes, SnP_ExtractBytesInLane, SnP_laneLengthInBytes) \ + { \ + if ((offset) == 0) { \ + SnP_ExtractLanes(state, data, (length)/SnP_laneLengthInBytes); \ + SnP_ExtractBytesInLane(state, \ + (length)/SnP_laneLengthInBytes, \ + (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + 0, \ + (length)%SnP_laneLengthInBytes); \ + } \ + else { \ + unsigned int _sizeLeft = (length); \ + unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \ + unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \ + unsigned char *_curData = (data); \ + while(_sizeLeft > 0) { \ + unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \ + if (_bytesInLane > _sizeLeft) \ + _bytesInLane = _sizeLeft; \ + SnP_ExtractBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \ + _sizeLeft -= _bytesInLane; \ + _lanePosition++; \ + _offsetInLane = 0; \ + _curData += _bytesInLane; \ + } \ + } \ + } + +#define SnP_ExtractAndAddBytes(state, input, output, offset, length, SnP_ExtractAndAddLanes, SnP_ExtractAndAddBytesInLane, SnP_laneLengthInBytes) \ + { \ + if ((offset) == 0) { \ + SnP_ExtractAndAddLanes(state, input, output, (length)/SnP_laneLengthInBytes); \ + SnP_ExtractAndAddBytesInLane(state, \ + (length)/SnP_laneLengthInBytes, \ + (input)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + (output)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + 0, \ + (length)%SnP_laneLengthInBytes); \ + } \ + else { \ + unsigned int _sizeLeft = (length); \ + unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \ + unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \ + const unsigned char *_curInput = (input); \ + unsigned char *_curOutput = (output); \ + while(_sizeLeft > 0) { \ + unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \ + if (_bytesInLane > _sizeLeft) \ + _bytesInLane = _sizeLeft; \ + SnP_ExtractAndAddBytesInLane(state, _lanePosition, _curInput, _curOutput, _offsetInLane, _bytesInLane); \ + _sizeLeft -= _bytesInLane; \ + _lanePosition++; \ + _offsetInLane = 0; \ + _curInput += _bytesInLane; \ + _curOutput += _bytesInLane; \ + } \ + } \ + } + +#endif diff --git a/ext/hash/sha3/generic32lc/align.h b/ext/hash/sha3/generic32lc/align.h new file mode 100644 index 00000000000..e29771ed38d --- /dev/null +++ b/ext/hash/sha3/generic32lc/align.h @@ -0,0 +1,34 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _align_h_ +#define _align_h_ + +/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */ +#ifdef ALIGN +#undef ALIGN +#endif + +#if defined(__GNUC__) +#define ALIGN(x) __attribute__ ((aligned(x))) +#elif defined(_MSC_VER) +#define ALIGN(x) __declspec(align(x)) +#elif defined(__ARMCC_VERSION) +#define ALIGN(x) __align(x) +#else +#define ALIGN(x) +#endif + +#endif diff --git a/ext/hash/sha3/generic32lc/brg_endian.h b/ext/hash/sha3/generic32lc/brg_endian.h new file mode 100644 index 00000000000..7226eb3bec5 --- /dev/null +++ b/ext/hash/sha3/generic32lc/brg_endian.h @@ -0,0 +1,142 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The redistribution and use of this software (with or without changes) + is allowed without the payment of fees or royalties provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 + Changes for ARM 9/9/2010 +*/ + +#ifndef _BRG_ENDIAN_H +#define _BRG_ENDIAN_H + +#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ +#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ + +#if 0 +/* Include files where endian defines and byteswap functions may reside */ +#if defined( __sun ) +# include +#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ ) +# include +#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \ + defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ ) +# include +#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ ) +# if !defined( __MINGW32__ ) && !defined( _AIX ) +# include +# if !defined( __BEOS__ ) +# include +# endif +# endif +#endif +#endif + +/* Now attempt to set the define for platform byte order using any */ +/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */ +/* seem to encompass most endian symbol definitions */ + +#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN ) +# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN ) +# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( _BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( _LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN ) +# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ ) +# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +/* if the platform byte order could not be determined, then try to */ +/* set this define using common machine defines */ +#if !defined(PLATFORM_BYTE_ORDER) + +#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \ + defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \ + defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \ + defined( vax ) || defined( vms ) || defined( VMS ) || \ + defined( __VMS ) || defined( _M_X64 ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN + +#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ + defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ + defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ + defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ + defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ + defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \ + defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN + +#elif defined(__arm__) +# ifdef __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# else +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif 1 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#elif 0 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#else +# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order +#endif + +#endif + +#endif diff --git a/ext/hash/sha3/generic64lc/KeccakHash.c b/ext/hash/sha3/generic64lc/KeccakHash.c new file mode 100644 index 00000000000..259831baddc --- /dev/null +++ b/ext/hash/sha3/generic64lc/KeccakHash.c @@ -0,0 +1,80 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include +#include "KeccakHash.h" + +/* ---------------------------------------------------------------- */ + +HashReturn Keccak_HashInitialize(Keccak_HashInstance *instance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix) +{ + HashReturn result; + + if (delimitedSuffix == 0) + return FAIL; + result = (HashReturn)KeccakWidth1600_SpongeInitialize(&instance->sponge, rate, capacity); + if (result != SUCCESS) + return result; + instance->fixedOutputLength = hashbitlen; + instance->delimitedSuffix = delimitedSuffix; + return SUCCESS; +} + +/* ---------------------------------------------------------------- */ + +HashReturn Keccak_HashUpdate(Keccak_HashInstance *instance, const BitSequence *data, DataLength databitlen) +{ + if ((databitlen % 8) == 0) + return (HashReturn)KeccakWidth1600_SpongeAbsorb(&instance->sponge, data, databitlen/8); + else { + HashReturn ret = (HashReturn)KeccakWidth1600_SpongeAbsorb(&instance->sponge, data, databitlen/8); + if (ret == SUCCESS) { + /* The last partial byte is assumed to be aligned on the least significant bits */ + unsigned char lastByte = data[databitlen/8]; + /* Concatenate the last few bits provided here with those of the suffix */ + unsigned short delimitedLastBytes = (unsigned short)((unsigned short)lastByte | ((unsigned short)instance->delimitedSuffix << (databitlen % 8))); + if ((delimitedLastBytes & 0xFF00) == 0x0000) { + instance->delimitedSuffix = delimitedLastBytes & 0xFF; + } + else { + unsigned char oneByte[1]; + oneByte[0] = delimitedLastBytes & 0xFF; + ret = (HashReturn)KeccakWidth1600_SpongeAbsorb(&instance->sponge, oneByte, 1); + instance->delimitedSuffix = (delimitedLastBytes >> 8) & 0xFF; + } + } + return ret; + } +} + +/* ---------------------------------------------------------------- */ + +HashReturn Keccak_HashFinal(Keccak_HashInstance *instance, BitSequence *hashval) +{ + HashReturn ret = (HashReturn)KeccakWidth1600_SpongeAbsorbLastFewBits(&instance->sponge, instance->delimitedSuffix); + if (ret == SUCCESS) + return (HashReturn)KeccakWidth1600_SpongeSqueeze(&instance->sponge, hashval, instance->fixedOutputLength/8); + else + return ret; +} + +/* ---------------------------------------------------------------- */ + +HashReturn Keccak_HashSqueeze(Keccak_HashInstance *instance, BitSequence *data, DataLength databitlen) +{ + if ((databitlen % 8) != 0) + return FAIL; + return (HashReturn)KeccakWidth1600_SpongeSqueeze(&instance->sponge, data, databitlen/8); +} diff --git a/ext/hash/sha3/generic64lc/KeccakHash.h b/ext/hash/sha3/generic64lc/KeccakHash.h new file mode 100644 index 00000000000..ec35d3dab25 --- /dev/null +++ b/ext/hash/sha3/generic64lc/KeccakHash.h @@ -0,0 +1,113 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _KeccakHashInterface_h_ +#define _KeccakHashInterface_h_ + +#ifndef KeccakP1600_excluded + +#include "KeccakSponge.h" +#include + +typedef unsigned char BitSequence; +typedef size_t DataLength; +typedef enum { SUCCESS = 0, FAIL = 1, BAD_HASHLEN = 2 } HashReturn; + +typedef struct { + KeccakWidth1600_SpongeInstance sponge; + unsigned int fixedOutputLength; + unsigned char delimitedSuffix; +} Keccak_HashInstance; + +/** + * Function to initialize the Keccak[r, c] sponge function instance used in sequential hashing mode. + * @param hashInstance Pointer to the hash instance to be initialized. + * @param rate The value of the rate r. + * @param capacity The value of the capacity c. + * @param hashbitlen The desired number of output bits, + * or 0 for an arbitrarily-long output. + * @param delimitedSuffix Bits that will be automatically appended to the end + * of the input message, as in domain separation. + * This is a byte containing from 0 to 7 bits + * formatted like the @a delimitedData parameter of + * the Keccak_SpongeAbsorbLastFewBits() function. + * @pre One must have r+c=1600 and the rate a multiple of 8 bits in this implementation. + * @return SUCCESS if successful, FAIL otherwise. + */ +HashReturn Keccak_HashInitialize(Keccak_HashInstance *hashInstance, unsigned int rate, unsigned int capacity, unsigned int hashbitlen, unsigned char delimitedSuffix); + +/** Macro to initialize a SHAKE128 instance as specified in the FIPS 202 standard. + */ +#define Keccak_HashInitialize_SHAKE128(hashInstance) Keccak_HashInitialize(hashInstance, 1344, 256, 0, 0x1F) + +/** Macro to initialize a SHAKE256 instance as specified in the FIPS 202 standard. + */ +#define Keccak_HashInitialize_SHAKE256(hashInstance) Keccak_HashInitialize(hashInstance, 1088, 512, 0, 0x1F) + +/** Macro to initialize a SHA3-224 instance as specified in the FIPS 202 standard. + */ +#define Keccak_HashInitialize_SHA3_224(hashInstance) Keccak_HashInitialize(hashInstance, 1152, 448, 224, 0x06) + +/** Macro to initialize a SHA3-256 instance as specified in the FIPS 202 standard. + */ +#define Keccak_HashInitialize_SHA3_256(hashInstance) Keccak_HashInitialize(hashInstance, 1088, 512, 256, 0x06) + +/** Macro to initialize a SHA3-384 instance as specified in the FIPS 202 standard. + */ +#define Keccak_HashInitialize_SHA3_384(hashInstance) Keccak_HashInitialize(hashInstance, 832, 768, 384, 0x06) + +/** Macro to initialize a SHA3-512 instance as specified in the FIPS 202 standard. + */ +#define Keccak_HashInitialize_SHA3_512(hashInstance) Keccak_HashInitialize(hashInstance, 576, 1024, 512, 0x06) + +/** + * Function to give input data to be absorbed. + * @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize(). + * @param data Pointer to the input data. + * When @a databitLen is not a multiple of 8, the last bits of data must be + * in the least significant bits of the last byte (little-endian convention). + * @param databitLen The number of input bits provided in the input data. + * @pre In the previous call to Keccak_HashUpdate(), databitlen was a multiple of 8. + * @return SUCCESS if successful, FAIL otherwise. + */ +HashReturn Keccak_HashUpdate(Keccak_HashInstance *hashInstance, const BitSequence *data, DataLength databitlen); + +/** + * Function to call after all input blocks have been input and to get + * output bits if the length was specified when calling Keccak_HashInitialize(). + * @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize(). + * If @a hashbitlen was not 0 in the call to Keccak_HashInitialize(), the number of + * output bits is equal to @a hashbitlen. + * If @a hashbitlen was 0 in the call to Keccak_HashInitialize(), the output bits + * must be extracted using the Keccak_HashSqueeze() function. + * @param hashval Pointer to the buffer where to store the output data. + * @return SUCCESS if successful, FAIL otherwise. + */ +HashReturn Keccak_HashFinal(Keccak_HashInstance *hashInstance, BitSequence *hashval); + + /** + * Function to squeeze output data. + * @param hashInstance Pointer to the hash instance initialized by Keccak_HashInitialize(). + * @param data Pointer to the buffer where to store the output data. + * @param databitlen The number of output bits desired (must be a multiple of 8). + * @pre Keccak_HashFinal() must have been already called. + * @pre @a databitlen is a multiple of 8. + * @return SUCCESS if successful, FAIL otherwise. + */ +HashReturn Keccak_HashSqueeze(Keccak_HashInstance *hashInstance, BitSequence *data, DataLength databitlen); + +#endif + +#endif diff --git a/ext/hash/sha3/generic64lc/KeccakP-1600-64.macros b/ext/hash/sha3/generic64lc/KeccakP-1600-64.macros new file mode 100644 index 00000000000..d81b15256b7 --- /dev/null +++ b/ext/hash/sha3/generic64lc/KeccakP-1600-64.macros @@ -0,0 +1,2197 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define declareABCDE \ + UINT64 Aba, Abe, Abi, Abo, Abu; \ + UINT64 Aga, Age, Agi, Ago, Agu; \ + UINT64 Aka, Ake, Aki, Ako, Aku; \ + UINT64 Ama, Ame, Ami, Amo, Amu; \ + UINT64 Asa, Ase, Asi, Aso, Asu; \ + UINT64 Bba, Bbe, Bbi, Bbo, Bbu; \ + UINT64 Bga, Bge, Bgi, Bgo, Bgu; \ + UINT64 Bka, Bke, Bki, Bko, Bku; \ + UINT64 Bma, Bme, Bmi, Bmo, Bmu; \ + UINT64 Bsa, Bse, Bsi, Bso, Bsu; \ + UINT64 Ca, Ce, Ci, Co, Cu; \ + UINT64 Da, De, Di, Do, Du; \ + UINT64 Eba, Ebe, Ebi, Ebo, Ebu; \ + UINT64 Ega, Ege, Egi, Ego, Egu; \ + UINT64 Eka, Eke, Eki, Eko, Eku; \ + UINT64 Ema, Eme, Emi, Emo, Emu; \ + UINT64 Esa, Ese, Esi, Eso, Esu; \ + +#define prepareTheta \ + Ca = Aba^Aga^Aka^Ama^Asa; \ + Ce = Abe^Age^Ake^Ame^Ase; \ + Ci = Abi^Agi^Aki^Ami^Asi; \ + Co = Abo^Ago^Ako^Amo^Aso; \ + Cu = Abu^Agu^Aku^Amu^Asu; \ + +#ifdef UseBebigokimisa +/* --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa') */ +/* --- 64-bit lanes mapped to 64-bit words */ +#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^( Bbe | Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + Ca = E##ba; \ + E##be = Bbe ^((~Bbi)| Bbo ); \ + Ce = E##be; \ + E##bi = Bbi ^( Bbo & Bbu ); \ + Ci = E##bi; \ + E##bo = Bbo ^( Bbu | Bba ); \ + Co = E##bo; \ + E##bu = Bbu ^( Bba & Bbe ); \ + Cu = E##bu; \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^( Bge | Bgi ); \ + Ca ^= E##ga; \ + E##ge = Bge ^( Bgi & Bgo ); \ + Ce ^= E##ge; \ + E##gi = Bgi ^( Bgo |(~Bgu)); \ + Ci ^= E##gi; \ + E##go = Bgo ^( Bgu | Bga ); \ + Co ^= E##go; \ + E##gu = Bgu ^( Bga & Bge ); \ + Cu ^= E##gu; \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^( Bke | Bki ); \ + Ca ^= E##ka; \ + E##ke = Bke ^( Bki & Bko ); \ + Ce ^= E##ke; \ + E##ki = Bki ^((~Bko)& Bku ); \ + Ci ^= E##ki; \ + E##ko = (~Bko)^( Bku | Bka ); \ + Co ^= E##ko; \ + E##ku = Bku ^( Bka & Bke ); \ + Cu ^= E##ku; \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^( Bme & Bmi ); \ + Ca ^= E##ma; \ + E##me = Bme ^( Bmi | Bmo ); \ + Ce ^= E##me; \ + E##mi = Bmi ^((~Bmo)| Bmu ); \ + Ci ^= E##mi; \ + E##mo = (~Bmo)^( Bmu & Bma ); \ + Co ^= E##mo; \ + E##mu = Bmu ^( Bma | Bme ); \ + Cu ^= E##mu; \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + Ca ^= E##sa; \ + E##se = (~Bse)^( Bsi | Bso ); \ + Ce ^= E##se; \ + E##si = Bsi ^( Bso & Bsu ); \ + Ci ^= E##si; \ + E##so = Bso ^( Bsu | Bsa ); \ + Co ^= E##so; \ + E##su = Bsu ^( Bsa & Bse ); \ + Cu ^= E##su; \ +\ + +/* --- Code for round (lane complementing pattern 'bebigokimisa') */ +/* --- 64-bit lanes mapped to 64-bit words */ +#define thetaRhoPiChiIota(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^( Bbe | Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + E##be = Bbe ^((~Bbi)| Bbo ); \ + E##bi = Bbi ^( Bbo & Bbu ); \ + E##bo = Bbo ^( Bbu | Bba ); \ + E##bu = Bbu ^( Bba & Bbe ); \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^( Bge | Bgi ); \ + E##ge = Bge ^( Bgi & Bgo ); \ + E##gi = Bgi ^( Bgo |(~Bgu)); \ + E##go = Bgo ^( Bgu | Bga ); \ + E##gu = Bgu ^( Bga & Bge ); \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^( Bke | Bki ); \ + E##ke = Bke ^( Bki & Bko ); \ + E##ki = Bki ^((~Bko)& Bku ); \ + E##ko = (~Bko)^( Bku | Bka ); \ + E##ku = Bku ^( Bka & Bke ); \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^( Bme & Bmi ); \ + E##me = Bme ^( Bmi | Bmo ); \ + E##mi = Bmi ^((~Bmo)| Bmu ); \ + E##mo = (~Bmo)^( Bmu & Bma ); \ + E##mu = Bmu ^( Bma | Bme ); \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + E##se = (~Bse)^( Bsi | Bso ); \ + E##si = Bsi ^( Bso & Bsu ); \ + E##so = Bso ^( Bsu | Bsa ); \ + E##su = Bsu ^( Bsa & Bse ); \ +\ + +#else /* UseBebigokimisa */ +/* --- Code for round, with prepare-theta */ +/* --- 64-bit lanes mapped to 64-bit words */ +#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^((~Bbe)& Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + Ca = E##ba; \ + E##be = Bbe ^((~Bbi)& Bbo ); \ + Ce = E##be; \ + E##bi = Bbi ^((~Bbo)& Bbu ); \ + Ci = E##bi; \ + E##bo = Bbo ^((~Bbu)& Bba ); \ + Co = E##bo; \ + E##bu = Bbu ^((~Bba)& Bbe ); \ + Cu = E##bu; \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^((~Bge)& Bgi ); \ + Ca ^= E##ga; \ + E##ge = Bge ^((~Bgi)& Bgo ); \ + Ce ^= E##ge; \ + E##gi = Bgi ^((~Bgo)& Bgu ); \ + Ci ^= E##gi; \ + E##go = Bgo ^((~Bgu)& Bga ); \ + Co ^= E##go; \ + E##gu = Bgu ^((~Bga)& Bge ); \ + Cu ^= E##gu; \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^((~Bke)& Bki ); \ + Ca ^= E##ka; \ + E##ke = Bke ^((~Bki)& Bko ); \ + Ce ^= E##ke; \ + E##ki = Bki ^((~Bko)& Bku ); \ + Ci ^= E##ki; \ + E##ko = Bko ^((~Bku)& Bka ); \ + Co ^= E##ko; \ + E##ku = Bku ^((~Bka)& Bke ); \ + Cu ^= E##ku; \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^((~Bme)& Bmi ); \ + Ca ^= E##ma; \ + E##me = Bme ^((~Bmi)& Bmo ); \ + Ce ^= E##me; \ + E##mi = Bmi ^((~Bmo)& Bmu ); \ + Ci ^= E##mi; \ + E##mo = Bmo ^((~Bmu)& Bma ); \ + Co ^= E##mo; \ + E##mu = Bmu ^((~Bma)& Bme ); \ + Cu ^= E##mu; \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + Ca ^= E##sa; \ + E##se = Bse ^((~Bsi)& Bso ); \ + Ce ^= E##se; \ + E##si = Bsi ^((~Bso)& Bsu ); \ + Ci ^= E##si; \ + E##so = Bso ^((~Bsu)& Bsa ); \ + Co ^= E##so; \ + E##su = Bsu ^((~Bsa)& Bse ); \ + Cu ^= E##su; \ +\ + +/* --- Code for round */ +/* --- 64-bit lanes mapped to 64-bit words */ +#define thetaRhoPiChiIota(i, A, E) \ + Da = Cu^ROL64(Ce, 1); \ + De = Ca^ROL64(Ci, 1); \ + Di = Ce^ROL64(Co, 1); \ + Do = Ci^ROL64(Cu, 1); \ + Du = Co^ROL64(Ca, 1); \ +\ + A##ba ^= Da; \ + Bba = A##ba; \ + A##ge ^= De; \ + Bbe = ROL64(A##ge, 44); \ + A##ki ^= Di; \ + Bbi = ROL64(A##ki, 43); \ + A##mo ^= Do; \ + Bbo = ROL64(A##mo, 21); \ + A##su ^= Du; \ + Bbu = ROL64(A##su, 14); \ + E##ba = Bba ^((~Bbe)& Bbi ); \ + E##ba ^= KeccakF1600RoundConstants[i]; \ + E##be = Bbe ^((~Bbi)& Bbo ); \ + E##bi = Bbi ^((~Bbo)& Bbu ); \ + E##bo = Bbo ^((~Bbu)& Bba ); \ + E##bu = Bbu ^((~Bba)& Bbe ); \ +\ + A##bo ^= Do; \ + Bga = ROL64(A##bo, 28); \ + A##gu ^= Du; \ + Bge = ROL64(A##gu, 20); \ + A##ka ^= Da; \ + Bgi = ROL64(A##ka, 3); \ + A##me ^= De; \ + Bgo = ROL64(A##me, 45); \ + A##si ^= Di; \ + Bgu = ROL64(A##si, 61); \ + E##ga = Bga ^((~Bge)& Bgi ); \ + E##ge = Bge ^((~Bgi)& Bgo ); \ + E##gi = Bgi ^((~Bgo)& Bgu ); \ + E##go = Bgo ^((~Bgu)& Bga ); \ + E##gu = Bgu ^((~Bga)& Bge ); \ +\ + A##be ^= De; \ + Bka = ROL64(A##be, 1); \ + A##gi ^= Di; \ + Bke = ROL64(A##gi, 6); \ + A##ko ^= Do; \ + Bki = ROL64(A##ko, 25); \ + A##mu ^= Du; \ + Bko = ROL64(A##mu, 8); \ + A##sa ^= Da; \ + Bku = ROL64(A##sa, 18); \ + E##ka = Bka ^((~Bke)& Bki ); \ + E##ke = Bke ^((~Bki)& Bko ); \ + E##ki = Bki ^((~Bko)& Bku ); \ + E##ko = Bko ^((~Bku)& Bka ); \ + E##ku = Bku ^((~Bka)& Bke ); \ +\ + A##bu ^= Du; \ + Bma = ROL64(A##bu, 27); \ + A##ga ^= Da; \ + Bme = ROL64(A##ga, 36); \ + A##ke ^= De; \ + Bmi = ROL64(A##ke, 10); \ + A##mi ^= Di; \ + Bmo = ROL64(A##mi, 15); \ + A##so ^= Do; \ + Bmu = ROL64(A##so, 56); \ + E##ma = Bma ^((~Bme)& Bmi ); \ + E##me = Bme ^((~Bmi)& Bmo ); \ + E##mi = Bmi ^((~Bmo)& Bmu ); \ + E##mo = Bmo ^((~Bmu)& Bma ); \ + E##mu = Bmu ^((~Bma)& Bme ); \ +\ + A##bi ^= Di; \ + Bsa = ROL64(A##bi, 62); \ + A##go ^= Do; \ + Bse = ROL64(A##go, 55); \ + A##ku ^= Du; \ + Bsi = ROL64(A##ku, 39); \ + A##ma ^= Da; \ + Bso = ROL64(A##ma, 41); \ + A##se ^= De; \ + Bsu = ROL64(A##se, 2); \ + E##sa = Bsa ^((~Bse)& Bsi ); \ + E##se = Bse ^((~Bsi)& Bso ); \ + E##si = Bsi ^((~Bso)& Bsu ); \ + E##so = Bso ^((~Bsu)& Bsa ); \ + E##su = Bsu ^((~Bsa)& Bse ); \ +\ + +#endif /* UseBebigokimisa */ + +#define copyFromState(X, state) \ + X##ba = state[ 0]; \ + X##be = state[ 1]; \ + X##bi = state[ 2]; \ + X##bo = state[ 3]; \ + X##bu = state[ 4]; \ + X##ga = state[ 5]; \ + X##ge = state[ 6]; \ + X##gi = state[ 7]; \ + X##go = state[ 8]; \ + X##gu = state[ 9]; \ + X##ka = state[10]; \ + X##ke = state[11]; \ + X##ki = state[12]; \ + X##ko = state[13]; \ + X##ku = state[14]; \ + X##ma = state[15]; \ + X##me = state[16]; \ + X##mi = state[17]; \ + X##mo = state[18]; \ + X##mu = state[19]; \ + X##sa = state[20]; \ + X##se = state[21]; \ + X##si = state[22]; \ + X##so = state[23]; \ + X##su = state[24]; \ + +#define copyToState(state, X) \ + state[ 0] = X##ba; \ + state[ 1] = X##be; \ + state[ 2] = X##bi; \ + state[ 3] = X##bo; \ + state[ 4] = X##bu; \ + state[ 5] = X##ga; \ + state[ 6] = X##ge; \ + state[ 7] = X##gi; \ + state[ 8] = X##go; \ + state[ 9] = X##gu; \ + state[10] = X##ka; \ + state[11] = X##ke; \ + state[12] = X##ki; \ + state[13] = X##ko; \ + state[14] = X##ku; \ + state[15] = X##ma; \ + state[16] = X##me; \ + state[17] = X##mi; \ + state[18] = X##mo; \ + state[19] = X##mu; \ + state[20] = X##sa; \ + state[21] = X##se; \ + state[22] = X##si; \ + state[23] = X##so; \ + state[24] = X##su; \ + +#define copyStateVariables(X, Y) \ + X##ba = Y##ba; \ + X##be = Y##be; \ + X##bi = Y##bi; \ + X##bo = Y##bo; \ + X##bu = Y##bu; \ + X##ga = Y##ga; \ + X##ge = Y##ge; \ + X##gi = Y##gi; \ + X##go = Y##go; \ + X##gu = Y##gu; \ + X##ka = Y##ka; \ + X##ke = Y##ke; \ + X##ki = Y##ki; \ + X##ko = Y##ko; \ + X##ku = Y##ku; \ + X##ma = Y##ma; \ + X##me = Y##me; \ + X##mi = Y##mi; \ + X##mo = Y##mo; \ + X##mu = Y##mu; \ + X##sa = Y##sa; \ + X##se = Y##se; \ + X##si = Y##si; \ + X##so = Y##so; \ + X##su = Y##su; \ + +#define copyFromStateAndAdd(X, state, input, laneCount) \ + if (laneCount < 16) { \ + if (laneCount < 8) { \ + if (laneCount < 4) { \ + if (laneCount < 2) { \ + if (laneCount < 1) { \ + X##ba = state[ 0]; \ + } \ + else { \ + X##ba = state[ 0]^input[ 0]; \ + } \ + X##be = state[ 1]; \ + X##bi = state[ 2]; \ + } \ + else { \ + X##ba = state[ 0]^input[ 0]; \ + X##be = state[ 1]^input[ 1]; \ + if (laneCount < 3) { \ + X##bi = state[ 2]; \ + } \ + else { \ + X##bi = state[ 2]^input[ 2]; \ + } \ + } \ + X##bo = state[ 3]; \ + X##bu = state[ 4]; \ + X##ga = state[ 5]; \ + X##ge = state[ 6]; \ + } \ + else { \ + X##ba = state[ 0]^input[ 0]; \ + X##be = state[ 1]^input[ 1]; \ + X##bi = state[ 2]^input[ 2]; \ + X##bo = state[ 3]^input[ 3]; \ + if (laneCount < 6) { \ + if (laneCount < 5) { \ + X##bu = state[ 4]; \ + } \ + else { \ + X##bu = state[ 4]^input[ 4]; \ + } \ + X##ga = state[ 5]; \ + X##ge = state[ 6]; \ + } \ + else { \ + X##bu = state[ 4]^input[ 4]; \ + X##ga = state[ 5]^input[ 5]; \ + if (laneCount < 7) { \ + X##ge = state[ 6]; \ + } \ + else { \ + X##ge = state[ 6]^input[ 6]; \ + } \ + } \ + } \ + X##gi = state[ 7]; \ + X##go = state[ 8]; \ + X##gu = state[ 9]; \ + X##ka = state[10]; \ + X##ke = state[11]; \ + X##ki = state[12]; \ + X##ko = state[13]; \ + X##ku = state[14]; \ + } \ + else { \ + X##ba = state[ 0]^input[ 0]; \ + X##be = state[ 1]^input[ 1]; \ + X##bi = state[ 2]^input[ 2]; \ + X##bo = state[ 3]^input[ 3]; \ + X##bu = state[ 4]^input[ 4]; \ + X##ga = state[ 5]^input[ 5]; \ + X##ge = state[ 6]^input[ 6]; \ + X##gi = state[ 7]^input[ 7]; \ + if (laneCount < 12) { \ + if (laneCount < 10) { \ + if (laneCount < 9) { \ + X##go = state[ 8]; \ + } \ + else { \ + X##go = state[ 8]^input[ 8]; \ + } \ + X##gu = state[ 9]; \ + X##ka = state[10]; \ + } \ + else { \ + X##go = state[ 8]^input[ 8]; \ + X##gu = state[ 9]^input[ 9]; \ + if (laneCount < 11) { \ + X##ka = state[10]; \ + } \ + else { \ + X##ka = state[10]^input[10]; \ + } \ + } \ + X##ke = state[11]; \ + X##ki = state[12]; \ + X##ko = state[13]; \ + X##ku = state[14]; \ + } \ + else { \ + X##go = state[ 8]^input[ 8]; \ + X##gu = state[ 9]^input[ 9]; \ + X##ka = state[10]^input[10]; \ + X##ke = state[11]^input[11]; \ + if (laneCount < 14) { \ + if (laneCount < 13) { \ + X##ki = state[12]; \ + } \ + else { \ + X##ki = state[12]^input[12]; \ + } \ + X##ko = state[13]; \ + X##ku = state[14]; \ + } \ + else { \ + X##ki = state[12]^input[12]; \ + X##ko = state[13]^input[13]; \ + if (laneCount < 15) { \ + X##ku = state[14]; \ + } \ + else { \ + X##ku = state[14]^input[14]; \ + } \ + } \ + } \ + } \ + X##ma = state[15]; \ + X##me = state[16]; \ + X##mi = state[17]; \ + X##mo = state[18]; \ + X##mu = state[19]; \ + X##sa = state[20]; \ + X##se = state[21]; \ + X##si = state[22]; \ + X##so = state[23]; \ + X##su = state[24]; \ + } \ + else { \ + X##ba = state[ 0]^input[ 0]; \ + X##be = state[ 1]^input[ 1]; \ + X##bi = state[ 2]^input[ 2]; \ + X##bo = state[ 3]^input[ 3]; \ + X##bu = state[ 4]^input[ 4]; \ + X##ga = state[ 5]^input[ 5]; \ + X##ge = state[ 6]^input[ 6]; \ + X##gi = state[ 7]^input[ 7]; \ + X##go = state[ 8]^input[ 8]; \ + X##gu = state[ 9]^input[ 9]; \ + X##ka = state[10]^input[10]; \ + X##ke = state[11]^input[11]; \ + X##ki = state[12]^input[12]; \ + X##ko = state[13]^input[13]; \ + X##ku = state[14]^input[14]; \ + X##ma = state[15]^input[15]; \ + if (laneCount < 24) { \ + if (laneCount < 20) { \ + if (laneCount < 18) { \ + if (laneCount < 17) { \ + X##me = state[16]; \ + } \ + else { \ + X##me = state[16]^input[16]; \ + } \ + X##mi = state[17]; \ + X##mo = state[18]; \ + } \ + else { \ + X##me = state[16]^input[16]; \ + X##mi = state[17]^input[17]; \ + if (laneCount < 19) { \ + X##mo = state[18]; \ + } \ + else { \ + X##mo = state[18]^input[18]; \ + } \ + } \ + X##mu = state[19]; \ + X##sa = state[20]; \ + X##se = state[21]; \ + X##si = state[22]; \ + } \ + else { \ + X##me = state[16]^input[16]; \ + X##mi = state[17]^input[17]; \ + X##mo = state[18]^input[18]; \ + X##mu = state[19]^input[19]; \ + if (laneCount < 22) { \ + if (laneCount < 21) { \ + X##sa = state[20]; \ + } \ + else { \ + X##sa = state[20]^input[20]; \ + } \ + X##se = state[21]; \ + X##si = state[22]; \ + } \ + else { \ + X##sa = state[20]^input[20]; \ + X##se = state[21]^input[21]; \ + if (laneCount < 23) { \ + X##si = state[22]; \ + } \ + else { \ + X##si = state[22]^input[22]; \ + } \ + } \ + } \ + X##so = state[23]; \ + X##su = state[24]; \ + } \ + else { \ + X##me = state[16]^input[16]; \ + X##mi = state[17]^input[17]; \ + X##mo = state[18]^input[18]; \ + X##mu = state[19]^input[19]; \ + X##sa = state[20]^input[20]; \ + X##se = state[21]^input[21]; \ + X##si = state[22]^input[22]; \ + X##so = state[23]^input[23]; \ + if (laneCount < 25) { \ + X##su = state[24]; \ + } \ + else { \ + X##su = state[24]^input[24]; \ + } \ + } \ + } + +#define addInput(X, input, laneCount) \ + if (laneCount == 21) { \ + X##ba ^= input[ 0]; \ + X##be ^= input[ 1]; \ + X##bi ^= input[ 2]; \ + X##bo ^= input[ 3]; \ + X##bu ^= input[ 4]; \ + X##ga ^= input[ 5]; \ + X##ge ^= input[ 6]; \ + X##gi ^= input[ 7]; \ + X##go ^= input[ 8]; \ + X##gu ^= input[ 9]; \ + X##ka ^= input[10]; \ + X##ke ^= input[11]; \ + X##ki ^= input[12]; \ + X##ko ^= input[13]; \ + X##ku ^= input[14]; \ + X##ma ^= input[15]; \ + X##me ^= input[16]; \ + X##mi ^= input[17]; \ + X##mo ^= input[18]; \ + X##mu ^= input[19]; \ + X##sa ^= input[20]; \ + } \ + else if (laneCount < 16) { \ + if (laneCount < 8) { \ + if (laneCount < 4) { \ + if (laneCount < 2) { \ + if (laneCount < 1) { \ + } \ + else { \ + X##ba ^= input[ 0]; \ + } \ + } \ + else { \ + X##ba ^= input[ 0]; \ + X##be ^= input[ 1]; \ + if (laneCount < 3) { \ + } \ + else { \ + X##bi ^= input[ 2]; \ + } \ + } \ + } \ + else { \ + X##ba ^= input[ 0]; \ + X##be ^= input[ 1]; \ + X##bi ^= input[ 2]; \ + X##bo ^= input[ 3]; \ + if (laneCount < 6) { \ + if (laneCount < 5) { \ + } \ + else { \ + X##bu ^= input[ 4]; \ + } \ + } \ + else { \ + X##bu ^= input[ 4]; \ + X##ga ^= input[ 5]; \ + if (laneCount < 7) { \ + } \ + else { \ + X##ge ^= input[ 6]; \ + } \ + } \ + } \ + } \ + else { \ + X##ba ^= input[ 0]; \ + X##be ^= input[ 1]; \ + X##bi ^= input[ 2]; \ + X##bo ^= input[ 3]; \ + X##bu ^= input[ 4]; \ + X##ga ^= input[ 5]; \ + X##ge ^= input[ 6]; \ + X##gi ^= input[ 7]; \ + if (laneCount < 12) { \ + if (laneCount < 10) { \ + if (laneCount < 9) { \ + } \ + else { \ + X##go ^= input[ 8]; \ + } \ + } \ + else { \ + X##go ^= input[ 8]; \ + X##gu ^= input[ 9]; \ + if (laneCount < 11) { \ + } \ + else { \ + X##ka ^= input[10]; \ + } \ + } \ + } \ + else { \ + X##go ^= input[ 8]; \ + X##gu ^= input[ 9]; \ + X##ka ^= input[10]; \ + X##ke ^= input[11]; \ + if (laneCount < 14) { \ + if (laneCount < 13) { \ + } \ + else { \ + X##ki ^= input[12]; \ + } \ + } \ + else { \ + X##ki ^= input[12]; \ + X##ko ^= input[13]; \ + if (laneCount < 15) { \ + } \ + else { \ + X##ku ^= input[14]; \ + } \ + } \ + } \ + } \ + } \ + else { \ + X##ba ^= input[ 0]; \ + X##be ^= input[ 1]; \ + X##bi ^= input[ 2]; \ + X##bo ^= input[ 3]; \ + X##bu ^= input[ 4]; \ + X##ga ^= input[ 5]; \ + X##ge ^= input[ 6]; \ + X##gi ^= input[ 7]; \ + X##go ^= input[ 8]; \ + X##gu ^= input[ 9]; \ + X##ka ^= input[10]; \ + X##ke ^= input[11]; \ + X##ki ^= input[12]; \ + X##ko ^= input[13]; \ + X##ku ^= input[14]; \ + X##ma ^= input[15]; \ + if (laneCount < 24) { \ + if (laneCount < 20) { \ + if (laneCount < 18) { \ + if (laneCount < 17) { \ + } \ + else { \ + X##me ^= input[16]; \ + } \ + } \ + else { \ + X##me ^= input[16]; \ + X##mi ^= input[17]; \ + if (laneCount < 19) { \ + } \ + else { \ + X##mo ^= input[18]; \ + } \ + } \ + } \ + else { \ + X##me ^= input[16]; \ + X##mi ^= input[17]; \ + X##mo ^= input[18]; \ + X##mu ^= input[19]; \ + if (laneCount < 22) { \ + if (laneCount < 21) { \ + } \ + else { \ + X##sa ^= input[20]; \ + } \ + } \ + else { \ + X##sa ^= input[20]; \ + X##se ^= input[21]; \ + if (laneCount < 23) { \ + } \ + else { \ + X##si ^= input[22]; \ + } \ + } \ + } \ + } \ + else { \ + X##me ^= input[16]; \ + X##mi ^= input[17]; \ + X##mo ^= input[18]; \ + X##mu ^= input[19]; \ + X##sa ^= input[20]; \ + X##se ^= input[21]; \ + X##si ^= input[22]; \ + X##so ^= input[23]; \ + if (laneCount < 25) { \ + } \ + else { \ + X##su ^= input[24]; \ + } \ + } \ + } + +#ifdef UseBebigokimisa + +#define copyToStateAndOutput(X, state, output, laneCount) \ + if (laneCount < 16) { \ + if (laneCount < 8) { \ + if (laneCount < 4) { \ + if (laneCount < 2) { \ + state[ 0] = X##ba; \ + if (laneCount >= 1) { \ + output[ 0] = X##ba; \ + } \ + state[ 1] = X##be; \ + state[ 2] = X##bi; \ + } \ + else { \ + state[ 0] = X##ba; \ + output[ 0] = X##ba; \ + state[ 1] = X##be; \ + output[ 1] = ~X##be; \ + state[ 2] = X##bi; \ + if (laneCount >= 3) { \ + output[ 2] = ~X##bi; \ + } \ + } \ + state[ 3] = X##bo; \ + state[ 4] = X##bu; \ + state[ 5] = X##ga; \ + state[ 6] = X##ge; \ + } \ + else { \ + state[ 0] = X##ba; \ + output[ 0] = X##ba; \ + state[ 1] = X##be; \ + output[ 1] = ~X##be; \ + state[ 2] = X##bi; \ + output[ 2] = ~X##bi; \ + state[ 3] = X##bo; \ + output[ 3] = X##bo; \ + if (laneCount < 6) { \ + state[ 4] = X##bu; \ + if (laneCount >= 5) { \ + output[ 4] = X##bu; \ + } \ + state[ 5] = X##ga; \ + state[ 6] = X##ge; \ + } \ + else { \ + state[ 4] = X##bu; \ + output[ 4] = X##bu; \ + state[ 5] = X##ga; \ + output[ 5] = X##ga; \ + state[ 6] = X##ge; \ + if (laneCount >= 7) { \ + output[ 6] = X##ge; \ + } \ + } \ + } \ + state[ 7] = X##gi; \ + state[ 8] = X##go; \ + state[ 9] = X##gu; \ + state[10] = X##ka; \ + state[11] = X##ke; \ + state[12] = X##ki; \ + state[13] = X##ko; \ + state[14] = X##ku; \ + } \ + else { \ + state[ 0] = X##ba; \ + output[ 0] = X##ba; \ + state[ 1] = X##be; \ + output[ 1] = ~X##be; \ + state[ 2] = X##bi; \ + output[ 2] = ~X##bi; \ + state[ 3] = X##bo; \ + output[ 3] = X##bo; \ + state[ 4] = X##bu; \ + output[ 4] = X##bu; \ + state[ 5] = X##ga; \ + output[ 5] = X##ga; \ + state[ 6] = X##ge; \ + output[ 6] = X##ge; \ + state[ 7] = X##gi; \ + output[ 7] = X##gi; \ + if (laneCount < 12) { \ + if (laneCount < 10) { \ + state[ 8] = X##go; \ + if (laneCount >= 9) { \ + output[ 8] = ~X##go; \ + } \ + state[ 9] = X##gu; \ + state[10] = X##ka; \ + } \ + else { \ + state[ 8] = X##go; \ + output[ 8] = ~X##go; \ + state[ 9] = X##gu; \ + output[ 9] = X##gu; \ + state[10] = X##ka; \ + if (laneCount >= 11) { \ + output[10] = X##ka; \ + } \ + } \ + state[11] = X##ke; \ + state[12] = X##ki; \ + state[13] = X##ko; \ + state[14] = X##ku; \ + } \ + else { \ + state[ 8] = X##go; \ + output[ 8] = ~X##go; \ + state[ 9] = X##gu; \ + output[ 9] = X##gu; \ + state[10] = X##ka; \ + output[10] = X##ka; \ + state[11] = X##ke; \ + output[11] = X##ke; \ + if (laneCount < 14) { \ + state[12] = X##ki; \ + if (laneCount >= 13) { \ + output[12] = ~X##ki; \ + } \ + state[13] = X##ko; \ + state[14] = X##ku; \ + } \ + else { \ + state[12] = X##ki; \ + output[12] = ~X##ki; \ + state[13] = X##ko; \ + output[13] = X##ko; \ + state[14] = X##ku; \ + if (laneCount >= 15) { \ + output[14] = X##ku; \ + } \ + } \ + } \ + } \ + state[15] = X##ma; \ + state[16] = X##me; \ + state[17] = X##mi; \ + state[18] = X##mo; \ + state[19] = X##mu; \ + state[20] = X##sa; \ + state[21] = X##se; \ + state[22] = X##si; \ + state[23] = X##so; \ + state[24] = X##su; \ + } \ + else { \ + state[ 0] = X##ba; \ + output[ 0] = X##ba; \ + state[ 1] = X##be; \ + output[ 1] = ~X##be; \ + state[ 2] = X##bi; \ + output[ 2] = ~X##bi; \ + state[ 3] = X##bo; \ + output[ 3] = X##bo; \ + state[ 4] = X##bu; \ + output[ 4] = X##bu; \ + state[ 5] = X##ga; \ + output[ 5] = X##ga; \ + state[ 6] = X##ge; \ + output[ 6] = X##ge; \ + state[ 7] = X##gi; \ + output[ 7] = X##gi; \ + state[ 8] = X##go; \ + output[ 8] = ~X##go; \ + state[ 9] = X##gu; \ + output[ 9] = X##gu; \ + state[10] = X##ka; \ + output[10] = X##ka; \ + state[11] = X##ke; \ + output[11] = X##ke; \ + state[12] = X##ki; \ + output[12] = ~X##ki; \ + state[13] = X##ko; \ + output[13] = X##ko; \ + state[14] = X##ku; \ + output[14] = X##ku; \ + state[15] = X##ma; \ + output[15] = X##ma; \ + if (laneCount < 24) { \ + if (laneCount < 20) { \ + if (laneCount < 18) { \ + state[16] = X##me; \ + if (laneCount >= 17) { \ + output[16] = X##me; \ + } \ + state[17] = X##mi; \ + state[18] = X##mo; \ + } \ + else { \ + state[16] = X##me; \ + output[16] = X##me; \ + state[17] = X##mi; \ + output[17] = ~X##mi; \ + state[18] = X##mo; \ + if (laneCount >= 19) { \ + output[18] = X##mo; \ + } \ + } \ + state[19] = X##mu; \ + state[20] = X##sa; \ + state[21] = X##se; \ + state[22] = X##si; \ + } \ + else { \ + state[16] = X##me; \ + output[16] = X##me; \ + state[17] = X##mi; \ + output[17] = ~X##mi; \ + state[18] = X##mo; \ + output[18] = X##mo; \ + state[19] = X##mu; \ + output[19] = X##mu; \ + if (laneCount < 22) { \ + state[20] = X##sa; \ + if (laneCount >= 21) { \ + output[20] = ~X##sa; \ + } \ + state[21] = X##se; \ + state[22] = X##si; \ + } \ + else { \ + state[20] = X##sa; \ + output[20] = ~X##sa; \ + state[21] = X##se; \ + output[21] = X##se; \ + state[22] = X##si; \ + if (laneCount >= 23) { \ + output[22] = X##si; \ + } \ + } \ + } \ + state[23] = X##so; \ + state[24] = X##su; \ + } \ + else { \ + state[16] = X##me; \ + output[16] = X##me; \ + state[17] = X##mi; \ + output[17] = ~X##mi; \ + state[18] = X##mo; \ + output[18] = X##mo; \ + state[19] = X##mu; \ + output[19] = X##mu; \ + state[20] = X##sa; \ + output[20] = ~X##sa; \ + state[21] = X##se; \ + output[21] = X##se; \ + state[22] = X##si; \ + output[22] = X##si; \ + state[23] = X##so; \ + output[23] = X##so; \ + state[24] = X##su; \ + if (laneCount >= 25) { \ + output[24] = X##su; \ + } \ + } \ + } + +#define output(X, output, laneCount) \ + if (laneCount < 16) { \ + if (laneCount < 8) { \ + if (laneCount < 4) { \ + if (laneCount < 2) { \ + if (laneCount >= 1) { \ + output[ 0] = X##ba; \ + } \ + } \ + else { \ + output[ 0] = X##ba; \ + output[ 1] = ~X##be; \ + if (laneCount >= 3) { \ + output[ 2] = ~X##bi; \ + } \ + } \ + } \ + else { \ + output[ 0] = X##ba; \ + output[ 1] = ~X##be; \ + output[ 2] = ~X##bi; \ + output[ 3] = X##bo; \ + if (laneCount < 6) { \ + if (laneCount >= 5) { \ + output[ 4] = X##bu; \ + } \ + } \ + else { \ + output[ 4] = X##bu; \ + output[ 5] = X##ga; \ + if (laneCount >= 7) { \ + output[ 6] = X##ge; \ + } \ + } \ + } \ + } \ + else { \ + output[ 0] = X##ba; \ + output[ 1] = ~X##be; \ + output[ 2] = ~X##bi; \ + output[ 3] = X##bo; \ + output[ 4] = X##bu; \ + output[ 5] = X##ga; \ + output[ 6] = X##ge; \ + output[ 7] = X##gi; \ + if (laneCount < 12) { \ + if (laneCount < 10) { \ + if (laneCount >= 9) { \ + output[ 8] = ~X##go; \ + } \ + } \ + else { \ + output[ 8] = ~X##go; \ + output[ 9] = X##gu; \ + if (laneCount >= 11) { \ + output[10] = X##ka; \ + } \ + } \ + } \ + else { \ + output[ 8] = ~X##go; \ + output[ 9] = X##gu; \ + output[10] = X##ka; \ + output[11] = X##ke; \ + if (laneCount < 14) { \ + if (laneCount >= 13) { \ + output[12] = ~X##ki; \ + } \ + } \ + else { \ + output[12] = ~X##ki; \ + output[13] = X##ko; \ + if (laneCount >= 15) { \ + output[14] = X##ku; \ + } \ + } \ + } \ + } \ + } \ + else { \ + output[ 0] = X##ba; \ + output[ 1] = ~X##be; \ + output[ 2] = ~X##bi; \ + output[ 3] = X##bo; \ + output[ 4] = X##bu; \ + output[ 5] = X##ga; \ + output[ 6] = X##ge; \ + output[ 7] = X##gi; \ + output[ 8] = ~X##go; \ + output[ 9] = X##gu; \ + output[10] = X##ka; \ + output[11] = X##ke; \ + output[12] = ~X##ki; \ + output[13] = X##ko; \ + output[14] = X##ku; \ + output[15] = X##ma; \ + if (laneCount < 24) { \ + if (laneCount < 20) { \ + if (laneCount < 18) { \ + if (laneCount >= 17) { \ + output[16] = X##me; \ + } \ + } \ + else { \ + output[16] = X##me; \ + output[17] = ~X##mi; \ + if (laneCount >= 19) { \ + output[18] = X##mo; \ + } \ + } \ + } \ + else { \ + output[16] = X##me; \ + output[17] = ~X##mi; \ + output[18] = X##mo; \ + output[19] = X##mu; \ + if (laneCount < 22) { \ + if (laneCount >= 21) { \ + output[20] = ~X##sa; \ + } \ + } \ + else { \ + output[20] = ~X##sa; \ + output[21] = X##se; \ + if (laneCount >= 23) { \ + output[22] = X##si; \ + } \ + } \ + } \ + } \ + else { \ + output[16] = X##me; \ + output[17] = ~X##mi; \ + output[18] = X##mo; \ + output[19] = X##mu; \ + output[20] = ~X##sa; \ + output[21] = X##se; \ + output[22] = X##si; \ + output[23] = X##so; \ + if (laneCount >= 25) { \ + output[24] = X##su; \ + } \ + } \ + } + +#define wrapOne(X, input, output, index, name) \ + X##name ^= input[index]; \ + output[index] = X##name; + +#define wrapOneInvert(X, input, output, index, name) \ + X##name ^= input[index]; \ + output[index] = ~X##name; + +#define unwrapOne(X, input, output, index, name) \ + output[index] = input[index] ^ X##name; \ + X##name ^= output[index]; + +#define unwrapOneInvert(X, input, output, index, name) \ + output[index] = ~(input[index] ^ X##name); \ + X##name ^= output[index]; \ + +#else /* UseBebigokimisa */ + +#define copyToStateAndOutput(X, state, output, laneCount) \ + if (laneCount < 16) { \ + if (laneCount < 8) { \ + if (laneCount < 4) { \ + if (laneCount < 2) { \ + state[ 0] = X##ba; \ + if (laneCount >= 1) { \ + output[ 0] = X##ba; \ + } \ + state[ 1] = X##be; \ + state[ 2] = X##bi; \ + } \ + else { \ + state[ 0] = X##ba; \ + output[ 0] = X##ba; \ + state[ 1] = X##be; \ + output[ 1] = X##be; \ + state[ 2] = X##bi; \ + if (laneCount >= 3) { \ + output[ 2] = X##bi; \ + } \ + } \ + state[ 3] = X##bo; \ + state[ 4] = X##bu; \ + state[ 5] = X##ga; \ + state[ 6] = X##ge; \ + } \ + else { \ + state[ 0] = X##ba; \ + output[ 0] = X##ba; \ + state[ 1] = X##be; \ + output[ 1] = X##be; \ + state[ 2] = X##bi; \ + output[ 2] = X##bi; \ + state[ 3] = X##bo; \ + output[ 3] = X##bo; \ + if (laneCount < 6) { \ + state[ 4] = X##bu; \ + if (laneCount >= 5) { \ + output[ 4] = X##bu; \ + } \ + state[ 5] = X##ga; \ + state[ 6] = X##ge; \ + } \ + else { \ + state[ 4] = X##bu; \ + output[ 4] = X##bu; \ + state[ 5] = X##ga; \ + output[ 5] = X##ga; \ + state[ 6] = X##ge; \ + if (laneCount >= 7) { \ + output[ 6] = X##ge; \ + } \ + } \ + } \ + state[ 7] = X##gi; \ + state[ 8] = X##go; \ + state[ 9] = X##gu; \ + state[10] = X##ka; \ + state[11] = X##ke; \ + state[12] = X##ki; \ + state[13] = X##ko; \ + state[14] = X##ku; \ + } \ + else { \ + state[ 0] = X##ba; \ + output[ 0] = X##ba; \ + state[ 1] = X##be; \ + output[ 1] = X##be; \ + state[ 2] = X##bi; \ + output[ 2] = X##bi; \ + state[ 3] = X##bo; \ + output[ 3] = X##bo; \ + state[ 4] = X##bu; \ + output[ 4] = X##bu; \ + state[ 5] = X##ga; \ + output[ 5] = X##ga; \ + state[ 6] = X##ge; \ + output[ 6] = X##ge; \ + state[ 7] = X##gi; \ + output[ 7] = X##gi; \ + if (laneCount < 12) { \ + if (laneCount < 10) { \ + state[ 8] = X##go; \ + if (laneCount >= 9) { \ + output[ 8] = X##go; \ + } \ + state[ 9] = X##gu; \ + state[10] = X##ka; \ + } \ + else { \ + state[ 8] = X##go; \ + output[ 8] = X##go; \ + state[ 9] = X##gu; \ + output[ 9] = X##gu; \ + state[10] = X##ka; \ + if (laneCount >= 11) { \ + output[10] = X##ka; \ + } \ + } \ + state[11] = X##ke; \ + state[12] = X##ki; \ + state[13] = X##ko; \ + state[14] = X##ku; \ + } \ + else { \ + state[ 8] = X##go; \ + output[ 8] = X##go; \ + state[ 9] = X##gu; \ + output[ 9] = X##gu; \ + state[10] = X##ka; \ + output[10] = X##ka; \ + state[11] = X##ke; \ + output[11] = X##ke; \ + if (laneCount < 14) { \ + state[12] = X##ki; \ + if (laneCount >= 13) { \ + output[12]= X##ki; \ + } \ + state[13] = X##ko; \ + state[14] = X##ku; \ + } \ + else { \ + state[12] = X##ki; \ + output[12]= X##ki; \ + state[13] = X##ko; \ + output[13] = X##ko; \ + state[14] = X##ku; \ + if (laneCount >= 15) { \ + output[14] = X##ku; \ + } \ + } \ + } \ + } \ + state[15] = X##ma; \ + state[16] = X##me; \ + state[17] = X##mi; \ + state[18] = X##mo; \ + state[19] = X##mu; \ + state[20] = X##sa; \ + state[21] = X##se; \ + state[22] = X##si; \ + state[23] = X##so; \ + state[24] = X##su; \ + } \ + else { \ + state[ 0] = X##ba; \ + output[ 0] = X##ba; \ + state[ 1] = X##be; \ + output[ 1] = X##be; \ + state[ 2] = X##bi; \ + output[ 2] = X##bi; \ + state[ 3] = X##bo; \ + output[ 3] = X##bo; \ + state[ 4] = X##bu; \ + output[ 4] = X##bu; \ + state[ 5] = X##ga; \ + output[ 5] = X##ga; \ + state[ 6] = X##ge; \ + output[ 6] = X##ge; \ + state[ 7] = X##gi; \ + output[ 7] = X##gi; \ + state[ 8] = X##go; \ + output[ 8] = X##go; \ + state[ 9] = X##gu; \ + output[ 9] = X##gu; \ + state[10] = X##ka; \ + output[10] = X##ka; \ + state[11] = X##ke; \ + output[11] = X##ke; \ + state[12] = X##ki; \ + output[12]= X##ki; \ + state[13] = X##ko; \ + output[13] = X##ko; \ + state[14] = X##ku; \ + output[14] = X##ku; \ + state[15] = X##ma; \ + output[15] = X##ma; \ + if (laneCount < 24) { \ + if (laneCount < 20) { \ + if (laneCount < 18) { \ + state[16] = X##me; \ + if (laneCount >= 17) { \ + output[16] = X##me; \ + } \ + state[17] = X##mi; \ + state[18] = X##mo; \ + } \ + else { \ + state[16] = X##me; \ + output[16] = X##me; \ + state[17] = X##mi; \ + output[17] = X##mi; \ + state[18] = X##mo; \ + if (laneCount >= 19) { \ + output[18] = X##mo; \ + } \ + } \ + state[19] = X##mu; \ + state[20] = X##sa; \ + state[21] = X##se; \ + state[22] = X##si; \ + } \ + else { \ + state[16] = X##me; \ + output[16] = X##me; \ + state[17] = X##mi; \ + output[17] = X##mi; \ + state[18] = X##mo; \ + output[18] = X##mo; \ + state[19] = X##mu; \ + output[19] = X##mu; \ + if (laneCount < 22) { \ + state[20] = X##sa; \ + if (laneCount >= 21) { \ + output[20] = X##sa; \ + } \ + state[21] = X##se; \ + state[22] = X##si; \ + } \ + else { \ + state[20] = X##sa; \ + output[20] = X##sa; \ + state[21] = X##se; \ + output[21] = X##se; \ + state[22] = X##si; \ + if (laneCount >= 23) { \ + output[22] = X##si; \ + } \ + } \ + } \ + state[23] = X##so; \ + state[24] = X##su; \ + } \ + else { \ + state[16] = X##me; \ + output[16] = X##me; \ + state[17] = X##mi; \ + output[17] = X##mi; \ + state[18] = X##mo; \ + output[18] = X##mo; \ + state[19] = X##mu; \ + output[19] = X##mu; \ + state[20] = X##sa; \ + output[20] = X##sa; \ + state[21] = X##se; \ + output[21] = X##se; \ + state[22] = X##si; \ + output[22] = X##si; \ + state[23] = X##so; \ + output[23] = X##so; \ + state[24] = X##su; \ + if (laneCount >= 25) { \ + output[24] = X##su; \ + } \ + } \ + } + +#define output(X, output, laneCount) \ + if (laneCount < 16) { \ + if (laneCount < 8) { \ + if (laneCount < 4) { \ + if (laneCount < 2) { \ + if (laneCount >= 1) { \ + output[ 0] = X##ba; \ + } \ + } \ + else { \ + output[ 0] = X##ba; \ + output[ 1] = X##be; \ + if (laneCount >= 3) { \ + output[ 2] = X##bi; \ + } \ + } \ + } \ + else { \ + output[ 0] = X##ba; \ + output[ 1] = X##be; \ + output[ 2] = X##bi; \ + output[ 3] = X##bo; \ + if (laneCount < 6) { \ + if (laneCount >= 5) { \ + output[ 4] = X##bu; \ + } \ + } \ + else { \ + output[ 4] = X##bu; \ + output[ 5] = X##ga; \ + if (laneCount >= 7) { \ + output[ 6] = X##ge; \ + } \ + } \ + } \ + } \ + else { \ + output[ 0] = X##ba; \ + output[ 1] = X##be; \ + output[ 2] = X##bi; \ + output[ 3] = X##bo; \ + output[ 4] = X##bu; \ + output[ 5] = X##ga; \ + output[ 6] = X##ge; \ + output[ 7] = X##gi; \ + if (laneCount < 12) { \ + if (laneCount < 10) { \ + if (laneCount >= 9) { \ + output[ 8] = X##go; \ + } \ + } \ + else { \ + output[ 8] = X##go; \ + output[ 9] = X##gu; \ + if (laneCount >= 11) { \ + output[10] = X##ka; \ + } \ + } \ + } \ + else { \ + output[ 8] = X##go; \ + output[ 9] = X##gu; \ + output[10] = X##ka; \ + output[11] = X##ke; \ + if (laneCount < 14) { \ + if (laneCount >= 13) { \ + output[12] = X##ki; \ + } \ + } \ + else { \ + output[12] = X##ki; \ + output[13] = X##ko; \ + if (laneCount >= 15) { \ + output[14] = X##ku; \ + } \ + } \ + } \ + } \ + } \ + else { \ + output[ 0] = X##ba; \ + output[ 1] = X##be; \ + output[ 2] = X##bi; \ + output[ 3] = X##bo; \ + output[ 4] = X##bu; \ + output[ 5] = X##ga; \ + output[ 6] = X##ge; \ + output[ 7] = X##gi; \ + output[ 8] = X##go; \ + output[ 9] = X##gu; \ + output[10] = X##ka; \ + output[11] = X##ke; \ + output[12] = X##ki; \ + output[13] = X##ko; \ + output[14] = X##ku; \ + output[15] = X##ma; \ + if (laneCount < 24) { \ + if (laneCount < 20) { \ + if (laneCount < 18) { \ + if (laneCount >= 17) { \ + output[16] = X##me; \ + } \ + } \ + else { \ + output[16] = X##me; \ + output[17] = X##mi; \ + if (laneCount >= 19) { \ + output[18] = X##mo; \ + } \ + } \ + } \ + else { \ + output[16] = X##me; \ + output[17] = X##mi; \ + output[18] = X##mo; \ + output[19] = X##mu; \ + if (laneCount < 22) { \ + if (laneCount >= 21) { \ + output[20] = X##sa; \ + } \ + } \ + else { \ + output[20] = X##sa; \ + output[21] = X##se; \ + if (laneCount >= 23) { \ + output[22] = X##si; \ + } \ + } \ + } \ + } \ + else { \ + output[16] = X##me; \ + output[17] = X##mi; \ + output[18] = X##mo; \ + output[19] = X##mu; \ + output[20] = X##sa; \ + output[21] = X##se; \ + output[22] = X##si; \ + output[23] = X##so; \ + if (laneCount >= 25) { \ + output[24] = X##su; \ + } \ + } \ + } + +#define wrapOne(X, input, output, index, name) \ + X##name ^= input[index]; \ + output[index] = X##name; + +#define wrapOneInvert(X, input, output, index, name) \ + X##name ^= input[index]; \ + output[index] = X##name; + +#define unwrapOne(X, input, output, index, name) \ + output[index] = input[index] ^ X##name; \ + X##name ^= output[index]; + +#define unwrapOneInvert(X, input, output, index, name) \ + output[index] = input[index] ^ X##name; \ + X##name ^= output[index]; + +#endif + +#define wrap(X, input, output, laneCount, trailingBits) \ + if (laneCount < 16) { \ + if (laneCount < 8) { \ + if (laneCount < 4) { \ + if (laneCount < 2) { \ + if (laneCount < 1) { \ + X##ba ^= trailingBits; \ + } \ + else { \ + wrapOne(X, input, output, 0, ba) \ + X##be ^= trailingBits; \ + } \ + } \ + else { \ + wrapOne(X, input, output, 0, ba) \ + wrapOneInvert(X, input, output, 1, be) \ + if (laneCount < 3) { \ + X##bi ^= trailingBits; \ + } \ + else { \ + wrapOneInvert(X, input, output, 2, bi) \ + X##bo ^= trailingBits; \ + } \ + } \ + } \ + else { \ + wrapOne(X, input, output, 0, ba) \ + wrapOneInvert(X, input, output, 1, be) \ + wrapOneInvert(X, input, output, 2, bi) \ + wrapOne(X, input, output, 3, bo) \ + if (laneCount < 6) { \ + if (laneCount < 5) { \ + X##bu ^= trailingBits; \ + } \ + else { \ + wrapOne(X, input, output, 4, bu) \ + X##ga ^= trailingBits; \ + } \ + } \ + else { \ + wrapOne(X, input, output, 4, bu) \ + wrapOne(X, input, output, 5, ga) \ + if (laneCount < 7) { \ + X##ge ^= trailingBits; \ + } \ + else { \ + wrapOne(X, input, output, 6, ge) \ + X##gi ^= trailingBits; \ + } \ + } \ + } \ + } \ + else { \ + wrapOne(X, input, output, 0, ba) \ + wrapOneInvert(X, input, output, 1, be) \ + wrapOneInvert(X, input, output, 2, bi) \ + wrapOne(X, input, output, 3, bo) \ + wrapOne(X, input, output, 4, bu) \ + wrapOne(X, input, output, 5, ga) \ + wrapOne(X, input, output, 6, ge) \ + wrapOne(X, input, output, 7, gi) \ + if (laneCount < 12) { \ + if (laneCount < 10) { \ + if (laneCount < 9) { \ + X##go ^= trailingBits; \ + } \ + else { \ + wrapOneInvert(X, input, output, 8, go) \ + X##gu ^= trailingBits; \ + } \ + } \ + else { \ + wrapOneInvert(X, input, output, 8, go) \ + wrapOne(X, input, output, 9, gu) \ + if (laneCount < 11) { \ + X##ka ^= trailingBits; \ + } \ + else { \ + wrapOne(X, input, output, 10, ka) \ + X##ke ^= trailingBits; \ + } \ + } \ + } \ + else { \ + wrapOneInvert(X, input, output, 8, go) \ + wrapOne(X, input, output, 9, gu) \ + wrapOne(X, input, output, 10, ka) \ + wrapOne(X, input, output, 11, ke) \ + if (laneCount < 14) { \ + if (laneCount < 13) { \ + X##ki ^= trailingBits; \ + } \ + else { \ + wrapOneInvert(X, input, output, 12, ki) \ + X##ko ^= trailingBits; \ + } \ + } \ + else { \ + wrapOneInvert(X, input, output, 12, ki) \ + wrapOne(X, input, output, 13, ko) \ + if (laneCount < 15) { \ + X##ku ^= trailingBits; \ + } \ + else { \ + wrapOne(X, input, output, 14, ku) \ + X##ma ^= trailingBits; \ + } \ + } \ + } \ + } \ + } \ + else { \ + wrapOne(X, input, output, 0, ba) \ + wrapOneInvert(X, input, output, 1, be) \ + wrapOneInvert(X, input, output, 2, bi) \ + wrapOne(X, input, output, 3, bo) \ + wrapOne(X, input, output, 4, bu) \ + wrapOne(X, input, output, 5, ga) \ + wrapOne(X, input, output, 6, ge) \ + wrapOne(X, input, output, 7, gi) \ + wrapOneInvert(X, input, output, 8, go) \ + wrapOne(X, input, output, 9, gu) \ + wrapOne(X, input, output, 10, ka) \ + wrapOne(X, input, output, 11, ke) \ + wrapOneInvert(X, input, output, 12, ki) \ + wrapOne(X, input, output, 13, ko) \ + wrapOne(X, input, output, 14, ku) \ + wrapOne(X, input, output, 15, ma) \ + if (laneCount < 24) { \ + if (laneCount < 20) { \ + if (laneCount < 18) { \ + if (laneCount < 17) { \ + X##me ^= trailingBits; \ + } \ + else { \ + wrapOne(X, input, output, 16, me) \ + X##mi ^= trailingBits; \ + } \ + } \ + else { \ + wrapOne(X, input, output, 16, me) \ + wrapOneInvert(X, input, output, 17, mi) \ + if (laneCount < 19) { \ + X##mo ^= trailingBits; \ + } \ + else { \ + wrapOne(X, input, output, 18, mo) \ + X##mu ^= trailingBits; \ + } \ + } \ + } \ + else { \ + wrapOne(X, input, output, 16, me) \ + wrapOneInvert(X, input, output, 17, mi) \ + wrapOne(X, input, output, 18, mo) \ + wrapOne(X, input, output, 19, mu) \ + if (laneCount < 22) { \ + if (laneCount < 21) { \ + X##sa ^= trailingBits; \ + } \ + else { \ + wrapOneInvert(X, input, output, 20, sa) \ + X##se ^= trailingBits; \ + } \ + } \ + else { \ + wrapOneInvert(X, input, output, 20, sa) \ + wrapOne(X, input, output, 21, se) \ + if (laneCount < 23) { \ + X##si ^= trailingBits; \ + } \ + else { \ + wrapOne(X, input, output, 22, si) \ + X##so ^= trailingBits; \ + } \ + } \ + } \ + } \ + else { \ + wrapOne(X, input, output, 16, me) \ + wrapOneInvert(X, input, output, 17, mi) \ + wrapOne(X, input, output, 18, mo) \ + wrapOne(X, input, output, 19, mu) \ + wrapOneInvert(X, input, output, 20, sa) \ + wrapOne(X, input, output, 21, se) \ + wrapOne(X, input, output, 22, si) \ + wrapOne(X, input, output, 23, so) \ + if (laneCount < 25) { \ + X##su ^= trailingBits; \ + } \ + else { \ + wrapOne(X, input, output, 24, su) \ + } \ + } \ + } + +#define unwrap(X, input, output, laneCount, trailingBits) \ + if (laneCount < 16) { \ + if (laneCount < 8) { \ + if (laneCount < 4) { \ + if (laneCount < 2) { \ + if (laneCount < 1) { \ + X##ba ^= trailingBits; \ + } \ + else { \ + unwrapOne(X, input, output, 0, ba) \ + X##be ^= trailingBits; \ + } \ + } \ + else { \ + unwrapOne(X, input, output, 0, ba) \ + unwrapOneInvert(X, input, output, 1, be) \ + if (laneCount < 3) { \ + X##bi ^= trailingBits; \ + } \ + else { \ + unwrapOneInvert(X, input, output, 2, bi) \ + X##bo ^= trailingBits; \ + } \ + } \ + } \ + else { \ + unwrapOne(X, input, output, 0, ba) \ + unwrapOneInvert(X, input, output, 1, be) \ + unwrapOneInvert(X, input, output, 2, bi) \ + unwrapOne(X, input, output, 3, bo) \ + if (laneCount < 6) { \ + if (laneCount < 5) { \ + X##bu ^= trailingBits; \ + } \ + else { \ + unwrapOne(X, input, output, 4, bu) \ + X##ga ^= trailingBits; \ + } \ + } \ + else { \ + unwrapOne(X, input, output, 4, bu) \ + unwrapOne(X, input, output, 5, ga) \ + if (laneCount < 7) { \ + X##ge ^= trailingBits; \ + } \ + else { \ + unwrapOne(X, input, output, 6, ge) \ + X##gi ^= trailingBits; \ + } \ + } \ + } \ + } \ + else { \ + unwrapOne(X, input, output, 0, ba) \ + unwrapOneInvert(X, input, output, 1, be) \ + unwrapOneInvert(X, input, output, 2, bi) \ + unwrapOne(X, input, output, 3, bo) \ + unwrapOne(X, input, output, 4, bu) \ + unwrapOne(X, input, output, 5, ga) \ + unwrapOne(X, input, output, 6, ge) \ + unwrapOne(X, input, output, 7, gi) \ + if (laneCount < 12) { \ + if (laneCount < 10) { \ + if (laneCount < 9) { \ + X##go ^= trailingBits; \ + } \ + else { \ + unwrapOneInvert(X, input, output, 8, go) \ + X##gu ^= trailingBits; \ + } \ + } \ + else { \ + unwrapOneInvert(X, input, output, 8, go) \ + unwrapOne(X, input, output, 9, gu) \ + if (laneCount < 11) { \ + X##ka ^= trailingBits; \ + } \ + else { \ + unwrapOne(X, input, output, 10, ka) \ + X##ke ^= trailingBits; \ + } \ + } \ + } \ + else { \ + unwrapOneInvert(X, input, output, 8, go) \ + unwrapOne(X, input, output, 9, gu) \ + unwrapOne(X, input, output, 10, ka) \ + unwrapOne(X, input, output, 11, ke) \ + if (laneCount < 14) { \ + if (laneCount < 13) { \ + X##ki ^= trailingBits; \ + } \ + else { \ + unwrapOneInvert(X, input, output, 12, ki) \ + X##ko ^= trailingBits; \ + } \ + } \ + else { \ + unwrapOneInvert(X, input, output, 12, ki) \ + unwrapOne(X, input, output, 13, ko) \ + if (laneCount < 15) { \ + X##ku ^= trailingBits; \ + } \ + else { \ + unwrapOne(X, input, output, 14, ku) \ + X##ma ^= trailingBits; \ + } \ + } \ + } \ + } \ + } \ + else { \ + unwrapOne(X, input, output, 0, ba) \ + unwrapOneInvert(X, input, output, 1, be) \ + unwrapOneInvert(X, input, output, 2, bi) \ + unwrapOne(X, input, output, 3, bo) \ + unwrapOne(X, input, output, 4, bu) \ + unwrapOne(X, input, output, 5, ga) \ + unwrapOne(X, input, output, 6, ge) \ + unwrapOne(X, input, output, 7, gi) \ + unwrapOneInvert(X, input, output, 8, go) \ + unwrapOne(X, input, output, 9, gu) \ + unwrapOne(X, input, output, 10, ka) \ + unwrapOne(X, input, output, 11, ke) \ + unwrapOneInvert(X, input, output, 12, ki) \ + unwrapOne(X, input, output, 13, ko) \ + unwrapOne(X, input, output, 14, ku) \ + unwrapOne(X, input, output, 15, ma) \ + if (laneCount < 24) { \ + if (laneCount < 20) { \ + if (laneCount < 18) { \ + if (laneCount < 17) { \ + X##me ^= trailingBits; \ + } \ + else { \ + unwrapOne(X, input, output, 16, me) \ + X##mi ^= trailingBits; \ + } \ + } \ + else { \ + unwrapOne(X, input, output, 16, me) \ + unwrapOneInvert(X, input, output, 17, mi) \ + if (laneCount < 19) { \ + X##mo ^= trailingBits; \ + } \ + else { \ + unwrapOne(X, input, output, 18, mo) \ + X##mu ^= trailingBits; \ + } \ + } \ + } \ + else { \ + unwrapOne(X, input, output, 16, me) \ + unwrapOneInvert(X, input, output, 17, mi) \ + unwrapOne(X, input, output, 18, mo) \ + unwrapOne(X, input, output, 19, mu) \ + if (laneCount < 22) { \ + if (laneCount < 21) { \ + X##sa ^= trailingBits; \ + } \ + else { \ + unwrapOneInvert(X, input, output, 20, sa) \ + X##se ^= trailingBits; \ + } \ + } \ + else { \ + unwrapOneInvert(X, input, output, 20, sa) \ + unwrapOne(X, input, output, 21, se) \ + if (laneCount < 23) { \ + X##si ^= trailingBits; \ + } \ + else { \ + unwrapOne(X, input, output, 22, si) \ + X##so ^= trailingBits; \ + } \ + } \ + } \ + } \ + else { \ + unwrapOne(X, input, output, 16, me) \ + unwrapOneInvert(X, input, output, 17, mi) \ + unwrapOne(X, input, output, 18, mo) \ + unwrapOne(X, input, output, 19, mu) \ + unwrapOneInvert(X, input, output, 20, sa) \ + unwrapOne(X, input, output, 21, se) \ + unwrapOne(X, input, output, 22, si) \ + unwrapOne(X, input, output, 23, so) \ + if (laneCount < 25) { \ + X##su ^= trailingBits; \ + } \ + else { \ + unwrapOne(X, input, output, 24, su) \ + } \ + } \ + } diff --git a/ext/hash/sha3/generic64lc/KeccakP-1600-SnP.h b/ext/hash/sha3/generic64lc/KeccakP-1600-SnP.h new file mode 100644 index 00000000000..85c67ca9c4d --- /dev/null +++ b/ext/hash/sha3/generic64lc/KeccakP-1600-SnP.h @@ -0,0 +1,50 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _KeccakP_1600_SnP_h_ +#define _KeccakP_1600_SnP_h_ + +/** For the documentation, see SnP-documentation.h. + */ + +#include "brg_endian.h" +#include "KeccakP-1600-opt64-config.h" + +#define KeccakP1600_implementation "generic 64-bit optimized implementation (" KeccakP1600_implementation_config ")" +#define KeccakP1600_stateSizeInBytes 200 +#define KeccakP1600_stateAlignment 8 +#define KeccakF1600_FastLoop_supported + +#include + +#define KeccakP1600_StaticInitialize() +void KeccakP1600_Initialize(void *state); +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) +#define KeccakP1600_AddByte(state, byte, offset) \ + ((unsigned char*)(state))[(offset)] ^= (byte) +#else +void KeccakP1600_AddByte(void *state, unsigned char data, unsigned int offset); +#endif +void KeccakP1600_AddBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length); +void KeccakP1600_OverwriteBytes(void *state, const unsigned char *data, unsigned int offset, unsigned int length); +void KeccakP1600_OverwriteWithZeroes(void *state, unsigned int byteCount); +void KeccakP1600_Permute_Nrounds(void *state, unsigned int nrounds); +void KeccakP1600_Permute_12rounds(void *state); +void KeccakP1600_Permute_24rounds(void *state); +void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length); +void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length); +size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen); + +#endif diff --git a/ext/hash/sha3/generic64lc/KeccakP-1600-opt64-config.h b/ext/hash/sha3/generic64lc/KeccakP-1600-opt64-config.h new file mode 100644 index 00000000000..9501c64b186 --- /dev/null +++ b/ext/hash/sha3/generic64lc/KeccakP-1600-opt64-config.h @@ -0,0 +1,3 @@ +#define KeccakP1600_implementation_config "lane complementing, all rounds unrolled" +#define KeccakP1600_fullUnrolling +#define KeccakP1600_useLaneComplementing diff --git a/ext/hash/sha3/generic64lc/KeccakP-1600-opt64.c b/ext/hash/sha3/generic64lc/KeccakP-1600-opt64.c new file mode 100644 index 00000000000..40853ff9979 --- /dev/null +++ b/ext/hash/sha3/generic64lc/KeccakP-1600-opt64.c @@ -0,0 +1,484 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include +#include +#include "brg_endian.h" +#include "KeccakP-1600-opt64-config.h" + +typedef unsigned char UINT8; +typedef unsigned long long int UINT64; + +#if defined(KeccakP1600_useLaneComplementing) +#define UseBebigokimisa +#endif + +#if defined(_MSC_VER) +#define ROL64(a, offset) _rotl64(a, offset) +#elif defined(KeccakP1600_useSHLD) + #define ROL64(x,N) ({ \ + register UINT64 __out; \ + register UINT64 __in = x; \ + __asm__ ("shld %2,%0,%0" : "=r"(__out) : "0"(__in), "i"(N)); \ + __out; \ + }) +#else +#define ROL64(a, offset) ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset))) +#endif + +#include "KeccakP-1600-64.macros" +#ifdef KeccakP1600_fullUnrolling +#define FullUnrolling +#else +#define Unrolling KeccakP1600_unrolling +#endif +#include "KeccakP-1600-unrolling.macros" +#include "SnP-Relaned.h" + +static const UINT64 KeccakF1600RoundConstants[24] = { + 0x0000000000000001ULL, + 0x0000000000008082ULL, + 0x800000000000808aULL, + 0x8000000080008000ULL, + 0x000000000000808bULL, + 0x0000000080000001ULL, + 0x8000000080008081ULL, + 0x8000000000008009ULL, + 0x000000000000008aULL, + 0x0000000000000088ULL, + 0x0000000080008009ULL, + 0x000000008000000aULL, + 0x000000008000808bULL, + 0x800000000000008bULL, + 0x8000000000008089ULL, + 0x8000000000008003ULL, + 0x8000000000008002ULL, + 0x8000000000000080ULL, + 0x000000000000800aULL, + 0x800000008000000aULL, + 0x8000000080008081ULL, + 0x8000000000008080ULL, + 0x0000000080000001ULL, + 0x8000000080008008ULL }; + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_Initialize(void *state) +{ + memset(state, 0, 200); +#ifdef KeccakP1600_useLaneComplementing + ((UINT64*)state)[ 1] = ~(UINT64)0; + ((UINT64*)state)[ 2] = ~(UINT64)0; + ((UINT64*)state)[ 8] = ~(UINT64)0; + ((UINT64*)state)[12] = ~(UINT64)0; + ((UINT64*)state)[17] = ~(UINT64)0; + ((UINT64*)state)[20] = ~(UINT64)0; +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_AddBytesInLane(void *state, unsigned int lanePosition, const unsigned char *data, unsigned int offset, unsigned int length) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + UINT64 lane; + if (length == 0) + return; + if (length == 1) + lane = data[0]; + else { + lane = 0; + memcpy(&lane, data, length); + } + lane <<= offset*8; +#else + UINT64 lane = 0; + unsigned int i; + for(i=0; i>= offset*8; + for(i=0; i>= 8; + } +#endif +} + +/* ---------------------------------------------------------------- */ + +#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN) +void fromWordToBytes(UINT8 *bytes, const UINT64 word) +{ + unsigned int i; + + for(i=0; i<(64/8); i++) + bytes[i] = (word >> (8*i)) & 0xFF; +} +#endif + +void KeccakP1600_ExtractLanes(const void *state, unsigned char *data, unsigned int laneCount) +{ +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + memcpy(data, state, laneCount*8); +#else + unsigned int i; + + for(i=0; i 1) { + ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1]; + if (laneCount > 2) { + ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2]; + if (laneCount > 8) { + ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8]; + if (laneCount > 12) { + ((UINT64*)data)[12] = ~((UINT64*)data)[12]; + if (laneCount > 17) { + ((UINT64*)data)[17] = ~((UINT64*)data)[17]; + if (laneCount > 20) { + ((UINT64*)data)[20] = ~((UINT64*)data)[20]; + } + } + } + } + } + } +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractBytes(const void *state, unsigned char *data, unsigned int offset, unsigned int length) +{ + SnP_ExtractBytes(state, data, offset, length, KeccakP1600_ExtractLanes, KeccakP1600_ExtractBytesInLane, 8); +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractAndAddBytesInLane(const void *state, unsigned int lanePosition, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +{ + UINT64 lane = ((UINT64*)state)[lanePosition]; +#ifdef KeccakP1600_useLaneComplementing + if ((lanePosition == 1) || (lanePosition == 2) || (lanePosition == 8) || (lanePosition == 12) || (lanePosition == 17) || (lanePosition == 20)) + lane = ~lane; +#endif +#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) + { + unsigned int i; + UINT64 lane1[1]; + lane1[0] = lane; + for(i=0; i>= offset*8; + for(i=0; i>= 8; + } +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractAndAddLanes(const void *state, const unsigned char *input, unsigned char *output, unsigned int laneCount) +{ + unsigned int i; +#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN) + unsigned char temp[8]; + unsigned int j; +#endif + + for(i=0; i 1) { + ((UINT64*)output)[ 1] = ~((UINT64*)output)[ 1]; + if (laneCount > 2) { + ((UINT64*)output)[ 2] = ~((UINT64*)output)[ 2]; + if (laneCount > 8) { + ((UINT64*)output)[ 8] = ~((UINT64*)output)[ 8]; + if (laneCount > 12) { + ((UINT64*)output)[12] = ~((UINT64*)output)[12]; + if (laneCount > 17) { + ((UINT64*)output)[17] = ~((UINT64*)output)[17]; + if (laneCount > 20) { + ((UINT64*)output)[20] = ~((UINT64*)output)[20]; + } + } + } + } + } + } +#endif +} + +/* ---------------------------------------------------------------- */ + +void KeccakP1600_ExtractAndAddBytes(const void *state, const unsigned char *input, unsigned char *output, unsigned int offset, unsigned int length) +{ + SnP_ExtractAndAddBytes(state, input, output, offset, length, KeccakP1600_ExtractAndAddLanes, KeccakP1600_ExtractAndAddBytesInLane, 8); +} + +/* ---------------------------------------------------------------- */ + +size_t KeccakF1600_FastLoop_Absorb(void *state, unsigned int laneCount, const unsigned char *data, size_t dataByteLen) +{ + size_t originalDataByteLen = dataByteLen; + declareABCDE + #ifndef KeccakP1600_fullUnrolling + unsigned int i; + #endif + UINT64 *stateAsLanes = (UINT64*)state; + UINT64 *inDataAsLanes = (UINT64*)data; + + copyFromState(A, stateAsLanes) + while(dataByteLen >= laneCount*8) { + addInput(A, inDataAsLanes, laneCount) + rounds24 + inDataAsLanes += laneCount; + dataByteLen -= laneCount*8; + } + copyToState(stateAsLanes, A) + return originalDataByteLen - dataByteLen; +} diff --git a/ext/hash/sha3/generic64lc/KeccakP-1600-unrolling.macros b/ext/hash/sha3/generic64lc/KeccakP-1600-unrolling.macros new file mode 100644 index 00000000000..3180bb06363 --- /dev/null +++ b/ext/hash/sha3/generic64lc/KeccakP-1600-unrolling.macros @@ -0,0 +1,198 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#if (defined(FullUnrolling)) +#define rounds24 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta( 0, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 1, E, A) \ + thetaRhoPiChiIotaPrepareTheta( 2, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 3, E, A) \ + thetaRhoPiChiIotaPrepareTheta( 4, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 5, E, A) \ + thetaRhoPiChiIotaPrepareTheta( 6, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 7, E, A) \ + thetaRhoPiChiIotaPrepareTheta( 8, A, E) \ + thetaRhoPiChiIotaPrepareTheta( 9, E, A) \ + thetaRhoPiChiIotaPrepareTheta(10, A, E) \ + thetaRhoPiChiIotaPrepareTheta(11, E, A) \ + thetaRhoPiChiIotaPrepareTheta(12, A, E) \ + thetaRhoPiChiIotaPrepareTheta(13, E, A) \ + thetaRhoPiChiIotaPrepareTheta(14, A, E) \ + thetaRhoPiChiIotaPrepareTheta(15, E, A) \ + thetaRhoPiChiIotaPrepareTheta(16, A, E) \ + thetaRhoPiChiIotaPrepareTheta(17, E, A) \ + thetaRhoPiChiIotaPrepareTheta(18, A, E) \ + thetaRhoPiChiIotaPrepareTheta(19, E, A) \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#define rounds12 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(12, A, E) \ + thetaRhoPiChiIotaPrepareTheta(13, E, A) \ + thetaRhoPiChiIotaPrepareTheta(14, A, E) \ + thetaRhoPiChiIotaPrepareTheta(15, E, A) \ + thetaRhoPiChiIotaPrepareTheta(16, A, E) \ + thetaRhoPiChiIotaPrepareTheta(17, E, A) \ + thetaRhoPiChiIotaPrepareTheta(18, A, E) \ + thetaRhoPiChiIotaPrepareTheta(19, E, A) \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#elif (Unrolling == 12) +#define rounds24 \ + prepareTheta \ + for(i=0; i<24; i+=12) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+ 2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 3, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+ 4, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 5, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+ 6, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 7, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+ 8, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+ 9, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+10, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+11, E, A) \ + } \ + +#define rounds12 \ + prepareTheta \ + thetaRhoPiChiIotaPrepareTheta(12, A, E) \ + thetaRhoPiChiIotaPrepareTheta(13, E, A) \ + thetaRhoPiChiIotaPrepareTheta(14, A, E) \ + thetaRhoPiChiIotaPrepareTheta(15, E, A) \ + thetaRhoPiChiIotaPrepareTheta(16, A, E) \ + thetaRhoPiChiIotaPrepareTheta(17, E, A) \ + thetaRhoPiChiIotaPrepareTheta(18, A, E) \ + thetaRhoPiChiIotaPrepareTheta(19, E, A) \ + thetaRhoPiChiIotaPrepareTheta(20, A, E) \ + thetaRhoPiChiIotaPrepareTheta(21, E, A) \ + thetaRhoPiChiIotaPrepareTheta(22, A, E) \ + thetaRhoPiChiIota(23, E, A) \ + +#elif (Unrolling == 6) +#define rounds24 \ + prepareTheta \ + for(i=0; i<24; i+=6) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \ + } \ + +#define rounds12 \ + prepareTheta \ + for(i=12; i<24; i+=6) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \ + } \ + +#elif (Unrolling == 4) +#define rounds24 \ + prepareTheta \ + for(i=0; i<24; i+=4) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ + } \ + +#define rounds12 \ + prepareTheta \ + for(i=12; i<24; i+=4) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \ + } \ + +#elif (Unrolling == 3) +#define rounds24 \ + prepareTheta \ + for(i=0; i<24; i+=3) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + copyStateVariables(A, E) \ + } \ + +#define rounds12 \ + prepareTheta \ + for(i=12; i<24; i+=3) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \ + copyStateVariables(A, E) \ + } \ + +#elif (Unrolling == 2) +#define rounds24 \ + prepareTheta \ + for(i=0; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } \ + +#define rounds12 \ + prepareTheta \ + for(i=12; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } \ + +#elif (Unrolling == 1) +#define rounds24 \ + prepareTheta \ + for(i=0; i<24; i++) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + copyStateVariables(A, E) \ + } \ + +#define rounds12 \ + prepareTheta \ + for(i=12; i<24; i++) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + copyStateVariables(A, E) \ + } \ + +#else +#error "Unrolling is not correctly specified!" +#endif + +#define roundsN(__nrounds) \ + prepareTheta \ + i = 24 - (__nrounds); \ + if ((i&1) != 0) { \ + thetaRhoPiChiIotaPrepareTheta(i, A, E) \ + copyStateVariables(A, E) \ + ++i; \ + } \ + for( /* empty */; i<24; i+=2) { \ + thetaRhoPiChiIotaPrepareTheta(i , A, E) \ + thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \ + } diff --git a/ext/hash/sha3/generic64lc/KeccakSponge.c b/ext/hash/sha3/generic64lc/KeccakSponge.c new file mode 100644 index 00000000000..08d4a19f0ac --- /dev/null +++ b/ext/hash/sha3/generic64lc/KeccakSponge.c @@ -0,0 +1,110 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#include "KeccakSponge.h" + +#ifdef KeccakReference + #include "displayIntermediateValues.h" +#endif + +#ifndef KeccakP200_excluded + #include "KeccakP-200-SnP.h" + + #define prefix KeccakWidth200 + #define SnP KeccakP200 + #define SnP_width 200 + #define SnP_Permute KeccakP200_Permute_18rounds + #if defined(KeccakF200_FastLoop_supported) + #define SnP_FastLoop_Absorb KeccakF200_FastLoop_Absorb + #endif + #include "KeccakSponge.inc" + #undef prefix + #undef SnP + #undef SnP_width + #undef SnP_Permute + #undef SnP_FastLoop_Absorb +#endif + +#ifndef KeccakP400_excluded + #include "KeccakP-400-SnP.h" + + #define prefix KeccakWidth400 + #define SnP KeccakP400 + #define SnP_width 400 + #define SnP_Permute KeccakP400_Permute_20rounds + #if defined(KeccakF400_FastLoop_supported) + #define SnP_FastLoop_Absorb KeccakF400_FastLoop_Absorb + #endif + #include "KeccakSponge.inc" + #undef prefix + #undef SnP + #undef SnP_width + #undef SnP_Permute + #undef SnP_FastLoop_Absorb +#endif + +#ifndef KeccakP800_excluded + #include "KeccakP-800-SnP.h" + + #define prefix KeccakWidth800 + #define SnP KeccakP800 + #define SnP_width 800 + #define SnP_Permute KeccakP800_Permute_22rounds + #if defined(KeccakF800_FastLoop_supported) + #define SnP_FastLoop_Absorb KeccakF800_FastLoop_Absorb + #endif + #include "KeccakSponge.inc" + #undef prefix + #undef SnP + #undef SnP_width + #undef SnP_Permute + #undef SnP_FastLoop_Absorb +#endif + +#ifndef KeccakP1600_excluded + #include "KeccakP-1600-SnP.h" + + #define prefix KeccakWidth1600 + #define SnP KeccakP1600 + #define SnP_width 1600 + #define SnP_Permute KeccakP1600_Permute_24rounds + #if defined(KeccakF1600_FastLoop_supported) + #define SnP_FastLoop_Absorb KeccakF1600_FastLoop_Absorb + #endif + #include "KeccakSponge.inc" + #undef prefix + #undef SnP + #undef SnP_width + #undef SnP_Permute + #undef SnP_FastLoop_Absorb +#endif + +#ifndef KeccakP1600_excluded + #include "KeccakP-1600-SnP.h" + + #define prefix KeccakWidth1600_12rounds + #define SnP KeccakP1600 + #define SnP_width 1600 + #define SnP_Permute KeccakP1600_Permute_12rounds + #if defined(KeccakP1600_12rounds_FastLoop_supported) + #define SnP_FastLoop_Absorb KeccakP1600_12rounds_FastLoop_Absorb + #endif + #include "KeccakSponge.inc" + #undef prefix + #undef SnP + #undef SnP_width + #undef SnP_Permute + #undef SnP_FastLoop_Absorb +#endif diff --git a/ext/hash/sha3/generic64lc/KeccakSponge.h b/ext/hash/sha3/generic64lc/KeccakSponge.h new file mode 100644 index 00000000000..a8526fe749c --- /dev/null +++ b/ext/hash/sha3/generic64lc/KeccakSponge.h @@ -0,0 +1,178 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _KeccakSponge_h_ +#define _KeccakSponge_h_ + +/** General information + * + * The following type and functions are not actually implemented. Their + * documentation is generic, with the prefix Prefix replaced by + * - KeccakWidth200 for a sponge function based on Keccak-f[200] + * - KeccakWidth400 for a sponge function based on Keccak-f[400] + * - KeccakWidth800 for a sponge function based on Keccak-f[800] + * - KeccakWidth1600 for a sponge function based on Keccak-f[1600] + * + * In all these functions, the rate and capacity must sum to the width of the + * chosen permutation. For instance, to use the sponge function + * Keccak[r=1344, c=256], one must use KeccakWidth1600_Sponge() or a combination + * of KeccakWidth1600_SpongeInitialize(), KeccakWidth1600_SpongeAbsorb(), + * KeccakWidth1600_SpongeAbsorbLastFewBits() and + * KeccakWidth1600_SpongeSqueeze(). + * + * The Prefix_SpongeInstance contains the sponge instance attributes for use + * with the Prefix_Sponge* functions. + * It gathers the state processed by the permutation as well as the rate, + * the position of input/output bytes in the state and the phase + * (absorbing or squeezing). + */ + +#ifdef DontReallyInclude_DocumentationOnly +/** Function to evaluate the sponge function Keccak[r, c] in a single call. + * @param rate The value of the rate r. + * @param capacity The value of the capacity c. + * @param input Pointer to the input message (before the suffix). + * @param inputByteLen The length of the input message in bytes. + * @param suffix Byte containing from 0 to 7 suffix bits + * that must be absorbed after @a input. + * These n bits must be in the least significant bit positions. + * These bits must be delimited with a bit 1 at position n + * (counting from 0=LSB to 7=MSB) and followed by bits 0 + * from position n+1 to position 7. + * Some examples: + * - If no bits are to be absorbed, then @a suffix must be 0x01. + * - If the 2-bit sequence 0,0 is to be absorbed, @a suffix must be 0x04. + * - If the 5-bit sequence 0,1,0,0,1 is to be absorbed, @a suffix must be 0x32. + * - If the 7-bit sequence 1,1,0,1,0,0,0 is to be absorbed, @a suffix must be 0x8B. + * . + * @param output Pointer to the output buffer. + * @param outputByteLen The desired number of output bytes. + * @pre One must have r+c equal to the supported width of this implementation + * and the rate a multiple of 8 bits (one byte) in this implementation. + * @pre @a suffix ≠ 0x00 + * @return Zero if successful, 1 otherwise. + */ +int Prefix_Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, size_t inputByteLen, unsigned char suffix, unsigned char *output, size_t outputByteLen); + +/** + * Function to initialize the state of the Keccak[r, c] sponge function. + * The phase of the sponge function is set to absorbing. + * @param spongeInstance Pointer to the sponge instance to be initialized. + * @param rate The value of the rate r. + * @param capacity The value of the capacity c. + * @pre One must have r+c equal to the supported width of this implementation + * and the rate a multiple of 8 bits (one byte) in this implementation. + * @return Zero if successful, 1 otherwise. + */ +int Prefix_SpongeInitialize(Prefix_SpongeInstance *spongeInstance, unsigned int rate, unsigned int capacity); + +/** + * Function to give input data bytes for the sponge function to absorb. + * @param spongeInstance Pointer to the sponge instance initialized by Prefix_SpongeInitialize(). + * @param data Pointer to the input data. + * @param dataByteLen The number of input bytes provided in the input data. + * @pre The sponge function must be in the absorbing phase, + * i.e., Prefix_SpongeSqueeze() or Prefix_SpongeAbsorbLastFewBits() + * must not have been called before. + * @return Zero if successful, 1 otherwise. + */ +int Prefix_SpongeAbsorb(Prefix_SpongeInstance *spongeInstance, const unsigned char *data, size_t dataByteLen); + +/** + * Function to give input data bits for the sponge function to absorb + * and then to switch to the squeezing phase. + * @param spongeInstance Pointer to the sponge instance initialized by Prefix_SpongeInitialize(). + * @param delimitedData Byte containing from 0 to 7 trailing bits + * that must be absorbed. + * These n bits must be in the least significant bit positions. + * These bits must be delimited with a bit 1 at position n + * (counting from 0=LSB to 7=MSB) and followed by bits 0 + * from position n+1 to position 7. + * Some examples: + * - If no bits are to be absorbed, then @a delimitedData must be 0x01. + * - If the 2-bit sequence 0,0 is to be absorbed, @a delimitedData must be 0x04. + * - If the 5-bit sequence 0,1,0,0,1 is to be absorbed, @a delimitedData must be 0x32. + * - If the 7-bit sequence 1,1,0,1,0,0,0 is to be absorbed, @a delimitedData must be 0x8B. + * . + * @pre The sponge function must be in the absorbing phase, + * i.e., Prefix_SpongeSqueeze() or Prefix_SpongeAbsorbLastFewBits() + * must not have been called before. + * @pre @a delimitedData ≠ 0x00 + * @return Zero if successful, 1 otherwise. + */ +int Prefix_SpongeAbsorbLastFewBits(Prefix_SpongeInstance *spongeInstance, unsigned char delimitedData); + +/** + * Function to squeeze output data from the sponge function. + * If the sponge function was in the absorbing phase, this function + * switches it to the squeezing phase + * as if Prefix_SpongeAbsorbLastFewBits(spongeInstance, 0x01) was called. + * @param spongeInstance Pointer to the sponge instance initialized by Prefix_SpongeInitialize(). + * @param data Pointer to the buffer where to store the output data. + * @param dataByteLen The number of output bytes desired. + * @return Zero if successful, 1 otherwise. + */ +int Prefix_SpongeSqueeze(Prefix_SpongeInstance *spongeInstance, unsigned char *data, size_t dataByteLen); +#endif + +#include +#include "align.h" + +#define KCP_DeclareSpongeStructure(prefix, size, alignment) \ + ALIGN(alignment) typedef struct prefix##_SpongeInstanceStruct { \ + unsigned char state[size]; \ + unsigned int rate; \ + unsigned int byteIOIndex; \ + int squeezing; \ + } prefix##_SpongeInstance; + +#define KCP_DeclareSpongeFunctions(prefix) \ + int prefix##_Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, size_t inputByteLen, unsigned char suffix, unsigned char *output, size_t outputByteLen); \ + int prefix##_SpongeInitialize(prefix##_SpongeInstance *spongeInstance, unsigned int rate, unsigned int capacity); \ + int prefix##_SpongeAbsorb(prefix##_SpongeInstance *spongeInstance, const unsigned char *data, size_t dataByteLen); \ + int prefix##_SpongeAbsorbLastFewBits(prefix##_SpongeInstance *spongeInstance, unsigned char delimitedData); \ + int prefix##_SpongeSqueeze(prefix##_SpongeInstance *spongeInstance, unsigned char *data, size_t dataByteLen); + +#ifndef KeccakP200_excluded + #include "KeccakP-200-SnP.h" + KCP_DeclareSpongeStructure(KeccakWidth200, KeccakP200_stateSizeInBytes, KeccakP200_stateAlignment) + KCP_DeclareSpongeFunctions(KeccakWidth200) +#endif + +#ifndef KeccakP400_excluded + #include "KeccakP-400-SnP.h" + KCP_DeclareSpongeStructure(KeccakWidth400, KeccakP400_stateSizeInBytes, KeccakP400_stateAlignment) + KCP_DeclareSpongeFunctions(KeccakWidth400) +#endif + +#ifndef KeccakP800_excluded + #include "KeccakP-800-SnP.h" + KCP_DeclareSpongeStructure(KeccakWidth800, KeccakP800_stateSizeInBytes, KeccakP800_stateAlignment) + KCP_DeclareSpongeFunctions(KeccakWidth800) +#endif + +#ifndef KeccakP1600_excluded + #include "KeccakP-1600-SnP.h" + KCP_DeclareSpongeStructure(KeccakWidth1600, KeccakP1600_stateSizeInBytes, KeccakP1600_stateAlignment) + KCP_DeclareSpongeFunctions(KeccakWidth1600) +#endif + +#ifndef KeccakP1600_excluded + #include "KeccakP-1600-SnP.h" + KCP_DeclareSpongeStructure(KeccakWidth1600_12rounds, KeccakP1600_stateSizeInBytes, KeccakP1600_stateAlignment) + KCP_DeclareSpongeFunctions(KeccakWidth1600_12rounds) +#endif + +#endif diff --git a/ext/hash/sha3/generic64lc/KeccakSponge.inc b/ext/hash/sha3/generic64lc/KeccakSponge.inc new file mode 100644 index 00000000000..42a15aac6d9 --- /dev/null +++ b/ext/hash/sha3/generic64lc/KeccakSponge.inc @@ -0,0 +1,313 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#define JOIN0(a, b) a ## b +#define JOIN(a, b) JOIN0(a, b) + +#define Sponge JOIN(prefix, _Sponge) +#define SpongeInstance JOIN(prefix, _SpongeInstance) +#define SpongeInitialize JOIN(prefix, _SpongeInitialize) +#define SpongeAbsorb JOIN(prefix, _SpongeAbsorb) +#define SpongeAbsorbLastFewBits JOIN(prefix, _SpongeAbsorbLastFewBits) +#define SpongeSqueeze JOIN(prefix, _SpongeSqueeze) + +#define SnP_stateSizeInBytes JOIN(SnP, _stateSizeInBytes) +#define SnP_stateAlignment JOIN(SnP, _stateAlignment) +#define SnP_StaticInitialize JOIN(SnP, _StaticInitialize) +#define SnP_Initialize JOIN(SnP, _Initialize) +#define SnP_AddByte JOIN(SnP, _AddByte) +#define SnP_AddBytes JOIN(SnP, _AddBytes) +#define SnP_ExtractBytes JOIN(SnP, _ExtractBytes) + +int Sponge(unsigned int rate, unsigned int capacity, const unsigned char *input, size_t inputByteLen, unsigned char suffix, unsigned char *output, size_t outputByteLen) +{ + ALIGN(SnP_stateAlignment) unsigned char state[SnP_stateSizeInBytes]; + unsigned int partialBlock; + const unsigned char *curInput = input; + unsigned char *curOutput = output; + unsigned int rateInBytes = rate/8; + + if (rate+capacity != SnP_width) + return 1; + if ((rate <= 0) || (rate > SnP_width) || ((rate % 8) != 0)) + return 1; + if (suffix == 0) + return 1; + + /* Initialize the state */ + SnP_StaticInitialize(); + SnP_Initialize(state); + + /* First, absorb whole blocks */ +#ifdef SnP_FastLoop_Absorb + if (((rateInBytes % (SnP_width/200)) == 0) && (inputByteLen >= rateInBytes)) { + /* fast lane: whole lane rate */ + size_t j; + j = SnP_FastLoop_Absorb(state, rateInBytes/(SnP_width/200), curInput, inputByteLen); + curInput += j; + inputByteLen -= j; + } +#endif + while(inputByteLen >= (size_t)rateInBytes) { + #ifdef KeccakReference + displayBytes(1, "Block to be absorbed", curInput, rateInBytes); + #endif + SnP_AddBytes(state, curInput, 0, rateInBytes); + SnP_Permute(state); + curInput += rateInBytes; + inputByteLen -= rateInBytes; + } + + /* Then, absorb what remains */ + partialBlock = (unsigned int)inputByteLen; + #ifdef KeccakReference + displayBytes(1, "Block to be absorbed (part)", curInput, partialBlock); + #endif + SnP_AddBytes(state, curInput, 0, partialBlock); + + /* Finally, absorb the suffix */ + #ifdef KeccakReference + { + unsigned char delimitedData1[1]; + delimitedData1[0] = suffix; + displayBytes(1, "Block to be absorbed (last few bits + first bit of padding)", delimitedData1, 1); + } + #endif + /* Last few bits, whose delimiter coincides with first bit of padding */ + SnP_AddByte(state, suffix, partialBlock); + /* If the first bit of padding is at position rate-1, we need a whole new block for the second bit of padding */ + if ((suffix >= 0x80) && (partialBlock == (rateInBytes-1))) + SnP_Permute(state); + /* Second bit of padding */ + SnP_AddByte(state, 0x80, rateInBytes-1); + #ifdef KeccakReference + { + unsigned char block[SnP_width/8]; + memset(block, 0, SnP_width/8); + block[rateInBytes-1] = 0x80; + displayBytes(1, "Second bit of padding", block, rateInBytes); + } + #endif + SnP_Permute(state); + #ifdef KeccakReference + displayText(1, "--- Switching to squeezing phase ---"); + #endif + + /* First, output whole blocks */ + while(outputByteLen > (size_t)rateInBytes) { + SnP_ExtractBytes(state, curOutput, 0, rateInBytes); + SnP_Permute(state); + #ifdef KeccakReference + displayBytes(1, "Squeezed block", curOutput, rateInBytes); + #endif + curOutput += rateInBytes; + outputByteLen -= rateInBytes; + } + + /* Finally, output what remains */ + partialBlock = (unsigned int)outputByteLen; + SnP_ExtractBytes(state, curOutput, 0, partialBlock); + #ifdef KeccakReference + displayBytes(1, "Squeezed block (part)", curOutput, partialBlock); + #endif + + return 0; +} + +/* ---------------------------------------------------------------- */ +/* ---------------------------------------------------------------- */ +/* ---------------------------------------------------------------- */ + +int SpongeInitialize(SpongeInstance *instance, unsigned int rate, unsigned int capacity) +{ + if (rate+capacity != SnP_width) + return 1; + if ((rate <= 0) || (rate > SnP_width) || ((rate % 8) != 0)) + return 1; + SnP_StaticInitialize(); + SnP_Initialize(instance->state); + instance->rate = rate; + instance->byteIOIndex = 0; + instance->squeezing = 0; + + return 0; +} + +/* ---------------------------------------------------------------- */ + +int SpongeAbsorb(SpongeInstance *instance, const unsigned char *data, size_t dataByteLen) +{ + size_t i, j; + unsigned int partialBlock; + const unsigned char *curData; + unsigned int rateInBytes = instance->rate/8; + + if (instance->squeezing) + return 1; /* Too late for additional input */ + + i = 0; + curData = data; + while(i < dataByteLen) { + if ((instance->byteIOIndex == 0) && (dataByteLen >= (i + rateInBytes))) { +#ifdef SnP_FastLoop_Absorb + /* processing full blocks first */ + if ((rateInBytes % (SnP_width/200)) == 0) { + /* fast lane: whole lane rate */ + j = SnP_FastLoop_Absorb(instance->state, rateInBytes/(SnP_width/200), curData, dataByteLen - i); + i += j; + curData += j; + } + else { +#endif + for(j=dataByteLen-i; j>=rateInBytes; j-=rateInBytes) { + #ifdef KeccakReference + displayBytes(1, "Block to be absorbed", curData, rateInBytes); + #endif + SnP_AddBytes(instance->state, curData, 0, rateInBytes); + SnP_Permute(instance->state); + curData+=rateInBytes; + } + i = dataByteLen - j; +#ifdef SnP_FastLoop_Absorb + } +#endif + } + else { + /* normal lane: using the message queue */ + partialBlock = (unsigned int)(dataByteLen - i); + if (partialBlock+instance->byteIOIndex > rateInBytes) + partialBlock = rateInBytes-instance->byteIOIndex; + #ifdef KeccakReference + displayBytes(1, "Block to be absorbed (part)", curData, partialBlock); + #endif + i += partialBlock; + + SnP_AddBytes(instance->state, curData, instance->byteIOIndex, partialBlock); + curData += partialBlock; + instance->byteIOIndex += partialBlock; + if (instance->byteIOIndex == rateInBytes) { + SnP_Permute(instance->state); + instance->byteIOIndex = 0; + } + } + } + return 0; +} + +/* ---------------------------------------------------------------- */ + +int SpongeAbsorbLastFewBits(SpongeInstance *instance, unsigned char delimitedData) +{ + unsigned int rateInBytes = instance->rate/8; + + if (delimitedData == 0) + return 1; + if (instance->squeezing) + return 1; /* Too late for additional input */ + + #ifdef KeccakReference + { + unsigned char delimitedData1[1]; + delimitedData1[0] = delimitedData; + displayBytes(1, "Block to be absorbed (last few bits + first bit of padding)", delimitedData1, 1); + } + #endif + /* Last few bits, whose delimiter coincides with first bit of padding */ + SnP_AddByte(instance->state, delimitedData, instance->byteIOIndex); + /* If the first bit of padding is at position rate-1, we need a whole new block for the second bit of padding */ + if ((delimitedData >= 0x80) && (instance->byteIOIndex == (rateInBytes-1))) + SnP_Permute(instance->state); + /* Second bit of padding */ + SnP_AddByte(instance->state, 0x80, rateInBytes-1); + #ifdef KeccakReference + { + unsigned char block[SnP_width/8]; + memset(block, 0, SnP_width/8); + block[rateInBytes-1] = 0x80; + displayBytes(1, "Second bit of padding", block, rateInBytes); + } + #endif + SnP_Permute(instance->state); + instance->byteIOIndex = 0; + instance->squeezing = 1; + #ifdef KeccakReference + displayText(1, "--- Switching to squeezing phase ---"); + #endif + return 0; +} + +/* ---------------------------------------------------------------- */ + +int SpongeSqueeze(SpongeInstance *instance, unsigned char *data, size_t dataByteLen) +{ + size_t i, j; + unsigned int partialBlock; + unsigned int rateInBytes = instance->rate/8; + unsigned char *curData; + + if (!instance->squeezing) + SpongeAbsorbLastFewBits(instance, 0x01); + + i = 0; + curData = data; + while(i < dataByteLen) { + if ((instance->byteIOIndex == rateInBytes) && (dataByteLen >= (i + rateInBytes))) { + for(j=dataByteLen-i; j>=rateInBytes; j-=rateInBytes) { + SnP_Permute(instance->state); + SnP_ExtractBytes(instance->state, curData, 0, rateInBytes); + #ifdef KeccakReference + displayBytes(1, "Squeezed block", curData, rateInBytes); + #endif + curData+=rateInBytes; + } + i = dataByteLen - j; + } + else { + /* normal lane: using the message queue */ + if (instance->byteIOIndex == rateInBytes) { + SnP_Permute(instance->state); + instance->byteIOIndex = 0; + } + partialBlock = (unsigned int)(dataByteLen - i); + if (partialBlock+instance->byteIOIndex > rateInBytes) + partialBlock = rateInBytes-instance->byteIOIndex; + i += partialBlock; + + SnP_ExtractBytes(instance->state, curData, instance->byteIOIndex, partialBlock); + #ifdef KeccakReference + displayBytes(1, "Squeezed block (part)", curData, partialBlock); + #endif + curData += partialBlock; + instance->byteIOIndex += partialBlock; + } + } + return 0; +} + +/* ---------------------------------------------------------------- */ + +#undef Sponge +#undef SpongeInstance +#undef SpongeInitialize +#undef SpongeAbsorb +#undef SpongeAbsorbLastFewBits +#undef SpongeSqueeze +#undef SnP_stateSizeInBytes +#undef SnP_stateAlignment +#undef SnP_StaticInitialize +#undef SnP_Initialize +#undef SnP_AddByte +#undef SnP_AddBytes +#undef SnP_ExtractBytes diff --git a/ext/hash/sha3/generic64lc/SnP-Relaned.h b/ext/hash/sha3/generic64lc/SnP-Relaned.h new file mode 100644 index 00000000000..086e635ff8f --- /dev/null +++ b/ext/hash/sha3/generic64lc/SnP-Relaned.h @@ -0,0 +1,134 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _SnP_Relaned_h_ +#define _SnP_Relaned_h_ + +#define SnP_AddBytes(state, data, offset, length, SnP_AddLanes, SnP_AddBytesInLane, SnP_laneLengthInBytes) \ + { \ + if ((offset) == 0) { \ + SnP_AddLanes(state, data, (length)/SnP_laneLengthInBytes); \ + SnP_AddBytesInLane(state, \ + (length)/SnP_laneLengthInBytes, \ + (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + 0, \ + (length)%SnP_laneLengthInBytes); \ + } \ + else { \ + unsigned int _sizeLeft = (length); \ + unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \ + unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \ + const unsigned char *_curData = (data); \ + while(_sizeLeft > 0) { \ + unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \ + if (_bytesInLane > _sizeLeft) \ + _bytesInLane = _sizeLeft; \ + SnP_AddBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \ + _sizeLeft -= _bytesInLane; \ + _lanePosition++; \ + _offsetInLane = 0; \ + _curData += _bytesInLane; \ + } \ + } \ + } + +#define SnP_OverwriteBytes(state, data, offset, length, SnP_OverwriteLanes, SnP_OverwriteBytesInLane, SnP_laneLengthInBytes) \ + { \ + if ((offset) == 0) { \ + SnP_OverwriteLanes(state, data, (length)/SnP_laneLengthInBytes); \ + SnP_OverwriteBytesInLane(state, \ + (length)/SnP_laneLengthInBytes, \ + (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + 0, \ + (length)%SnP_laneLengthInBytes); \ + } \ + else { \ + unsigned int _sizeLeft = (length); \ + unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \ + unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \ + const unsigned char *_curData = (data); \ + while(_sizeLeft > 0) { \ + unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \ + if (_bytesInLane > _sizeLeft) \ + _bytesInLane = _sizeLeft; \ + SnP_OverwriteBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \ + _sizeLeft -= _bytesInLane; \ + _lanePosition++; \ + _offsetInLane = 0; \ + _curData += _bytesInLane; \ + } \ + } \ + } + +#define SnP_ExtractBytes(state, data, offset, length, SnP_ExtractLanes, SnP_ExtractBytesInLane, SnP_laneLengthInBytes) \ + { \ + if ((offset) == 0) { \ + SnP_ExtractLanes(state, data, (length)/SnP_laneLengthInBytes); \ + SnP_ExtractBytesInLane(state, \ + (length)/SnP_laneLengthInBytes, \ + (data)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + 0, \ + (length)%SnP_laneLengthInBytes); \ + } \ + else { \ + unsigned int _sizeLeft = (length); \ + unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \ + unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \ + unsigned char *_curData = (data); \ + while(_sizeLeft > 0) { \ + unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \ + if (_bytesInLane > _sizeLeft) \ + _bytesInLane = _sizeLeft; \ + SnP_ExtractBytesInLane(state, _lanePosition, _curData, _offsetInLane, _bytesInLane); \ + _sizeLeft -= _bytesInLane; \ + _lanePosition++; \ + _offsetInLane = 0; \ + _curData += _bytesInLane; \ + } \ + } \ + } + +#define SnP_ExtractAndAddBytes(state, input, output, offset, length, SnP_ExtractAndAddLanes, SnP_ExtractAndAddBytesInLane, SnP_laneLengthInBytes) \ + { \ + if ((offset) == 0) { \ + SnP_ExtractAndAddLanes(state, input, output, (length)/SnP_laneLengthInBytes); \ + SnP_ExtractAndAddBytesInLane(state, \ + (length)/SnP_laneLengthInBytes, \ + (input)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + (output)+((length)/SnP_laneLengthInBytes)*SnP_laneLengthInBytes, \ + 0, \ + (length)%SnP_laneLengthInBytes); \ + } \ + else { \ + unsigned int _sizeLeft = (length); \ + unsigned int _lanePosition = (offset)/SnP_laneLengthInBytes; \ + unsigned int _offsetInLane = (offset)%SnP_laneLengthInBytes; \ + const unsigned char *_curInput = (input); \ + unsigned char *_curOutput = (output); \ + while(_sizeLeft > 0) { \ + unsigned int _bytesInLane = SnP_laneLengthInBytes - _offsetInLane; \ + if (_bytesInLane > _sizeLeft) \ + _bytesInLane = _sizeLeft; \ + SnP_ExtractAndAddBytesInLane(state, _lanePosition, _curInput, _curOutput, _offsetInLane, _bytesInLane); \ + _sizeLeft -= _bytesInLane; \ + _lanePosition++; \ + _offsetInLane = 0; \ + _curInput += _bytesInLane; \ + _curOutput += _bytesInLane; \ + } \ + } \ + } + +#endif diff --git a/ext/hash/sha3/generic64lc/align.h b/ext/hash/sha3/generic64lc/align.h new file mode 100644 index 00000000000..e29771ed38d --- /dev/null +++ b/ext/hash/sha3/generic64lc/align.h @@ -0,0 +1,34 @@ +/* +Implementation by the Keccak, Keyak and Ketje Teams, namely, Guido Bertoni, +Joan Daemen, Michaël Peeters, Gilles Van Assche and Ronny Van Keer, hereby +denoted as "the implementer". + +For more information, feedback or questions, please refer to our websites: +http://keccak.noekeon.org/ +http://keyak.noekeon.org/ +http://ketje.noekeon.org/ + +To the extent possible under law, the implementer has waived all copyright +and related or neighboring rights to the source code in this file. +http://creativecommons.org/publicdomain/zero/1.0/ +*/ + +#ifndef _align_h_ +#define _align_h_ + +/* on Mac OS-X and possibly others, ALIGN(x) is defined in param.h, and -Werror chokes on the redef. */ +#ifdef ALIGN +#undef ALIGN +#endif + +#if defined(__GNUC__) +#define ALIGN(x) __attribute__ ((aligned(x))) +#elif defined(_MSC_VER) +#define ALIGN(x) __declspec(align(x)) +#elif defined(__ARMCC_VERSION) +#define ALIGN(x) __align(x) +#else +#define ALIGN(x) +#endif + +#endif diff --git a/ext/hash/sha3/generic64lc/brg_endian.h b/ext/hash/sha3/generic64lc/brg_endian.h new file mode 100644 index 00000000000..7226eb3bec5 --- /dev/null +++ b/ext/hash/sha3/generic64lc/brg_endian.h @@ -0,0 +1,142 @@ +/* + --------------------------------------------------------------------------- + Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved. + + LICENSE TERMS + + The redistribution and use of this software (with or without changes) + is allowed without the payment of fees or royalties provided that: + + 1. source code distributions include the above copyright notice, this + list of conditions and the following disclaimer; + + 2. binary distributions include the above copyright notice, this list + of conditions and the following disclaimer in their documentation; + + 3. the name of the copyright holder is not used to endorse products + built using this software without specific written permission. + + DISCLAIMER + + This software is provided 'as is' with no explicit or implied warranties + in respect of its properties, including, but not limited to, correctness + and/or fitness for purpose. + --------------------------------------------------------------------------- + Issue Date: 20/12/2007 + Changes for ARM 9/9/2010 +*/ + +#ifndef _BRG_ENDIAN_H +#define _BRG_ENDIAN_H + +#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */ +#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */ + +#if 0 +/* Include files where endian defines and byteswap functions may reside */ +#if defined( __sun ) +# include +#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ ) +# include +#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \ + defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ ) +# include +#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ ) +# if !defined( __MINGW32__ ) && !defined( _AIX ) +# include +# if !defined( __BEOS__ ) +# include +# endif +# endif +#endif +#endif + +/* Now attempt to set the define for platform byte order using any */ +/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */ +/* seem to encompass most endian symbol definitions */ + +#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN ) +# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN ) +# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( _BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( _LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN ) +# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ ) +# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif defined( __BIG_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#elif defined( __LITTLE_ENDIAN__ ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#endif + +/* if the platform byte order could not be determined, then try to */ +/* set this define using common machine defines */ +#if !defined(PLATFORM_BYTE_ORDER) + +#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \ + defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \ + defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \ + defined( vax ) || defined( vms ) || defined( VMS ) || \ + defined( __VMS ) || defined( _M_X64 ) +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN + +#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \ + defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \ + defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \ + defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \ + defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \ + defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \ + defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX ) +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN + +#elif defined(__arm__) +# ifdef __BIG_ENDIAN +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +# else +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +# endif +#elif 1 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN +#elif 0 /* **** EDIT HERE IF NECESSARY **** */ +# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN +#else +# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order +#endif + +#endif + +#endif