crypto: mips/poly1305 - incorporate OpenSSL/CRYPTOGAMS optimized implementation

This is a straight import of the OpenSSL/CRYPTOGAMS Poly1305 implementation for
MIPS authored by Andy Polyakov, a prior 64-bit only version of which has been
contributed by him to the OpenSSL project. The file 'poly1305-mips.pl' is taken
straight from this upstream GitHub repository [0] at commit
d22ade312a7af958ec955620b0d241cf42c37feb, and already contains all the changes
required to build it as part of a Linux kernel module.

[0] https://github.com/dot-asm/cryptogams

Co-developed-by: Andy Polyakov <appro@cryptogams.org>
Signed-off-by: Andy Polyakov <appro@cryptogams.org>
Co-developed-by: René van Dorst <opensource@vdorst.com>
Signed-off-by: René van Dorst <opensource@vdorst.com>
Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
Ard Biesheuvel 2019-11-08 13:22:26 +01:00 committed by Herbert Xu
parent a6b803b3dd
commit a11d055e7a
5 changed files with 1496 additions and 0 deletions

View File

@ -8,3 +8,17 @@ obj-$(CONFIG_CRYPTO_CRC32_MIPS) += crc32-mips.o
obj-$(CONFIG_CRYPTO_CHACHA_MIPS) += chacha-mips.o
chacha-mips-y := chacha-core.o chacha-glue.o
AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots
obj-$(CONFIG_CRYPTO_POLY1305_MIPS) += poly1305-mips.o
poly1305-mips-y := poly1305-core.o poly1305-glue.o
perlasm-flavour-$(CONFIG_CPU_MIPS32) := o32
perlasm-flavour-$(CONFIG_CPU_MIPS64) := 64
quiet_cmd_perlasm = PERLASM $@
cmd_perlasm = $(PERL) $(<) $(perlasm-flavour-y) $(@)
$(obj)/poly1305-core.S: $(src)/poly1305-mips.pl FORCE
$(call if_changed,perlasm)
targets += poly1305-core.S

View File

@ -0,0 +1,203 @@
// SPDX-License-Identifier: GPL-2.0
/*
* OpenSSL/Cryptogams accelerated Poly1305 transform for MIPS
*
* Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org>
*/
#include <asm/unaligned.h>
#include <crypto/algapi.h>
#include <crypto/internal/hash.h>
#include <crypto/internal/poly1305.h>
#include <linux/cpufeature.h>
#include <linux/crypto.h>
#include <linux/module.h>
asmlinkage void poly1305_init_mips(void *state, const u8 *key);
asmlinkage void poly1305_blocks_mips(void *state, const u8 *src, u32 len, u32 hibit);
asmlinkage void poly1305_emit_mips(void *state, __le32 *digest, const u32 *nonce);
void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
{
poly1305_init_mips(&dctx->h, key);
dctx->s[0] = get_unaligned_le32(key + 16);
dctx->s[1] = get_unaligned_le32(key + 20);
dctx->s[2] = get_unaligned_le32(key + 24);
dctx->s[3] = get_unaligned_le32(key + 28);
dctx->buflen = 0;
}
EXPORT_SYMBOL(poly1305_init_arch);
static int mips_poly1305_init(struct shash_desc *desc)
{
struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
dctx->buflen = 0;
dctx->rset = 0;
dctx->sset = false;
return 0;
}
static void mips_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src,
u32 len, u32 hibit)
{
if (unlikely(!dctx->sset)) {
if (!dctx->rset) {
poly1305_init_mips(&dctx->h, src);
src += POLY1305_BLOCK_SIZE;
len -= POLY1305_BLOCK_SIZE;
dctx->rset = 1;
}
if (len >= POLY1305_BLOCK_SIZE) {
dctx->s[0] = get_unaligned_le32(src + 0);
dctx->s[1] = get_unaligned_le32(src + 4);
dctx->s[2] = get_unaligned_le32(src + 8);
dctx->s[3] = get_unaligned_le32(src + 12);
src += POLY1305_BLOCK_SIZE;
len -= POLY1305_BLOCK_SIZE;
dctx->sset = true;
}
if (len < POLY1305_BLOCK_SIZE)
return;
}
len &= ~(POLY1305_BLOCK_SIZE - 1);
poly1305_blocks_mips(&dctx->h, src, len, hibit);
}
static int mips_poly1305_update(struct shash_desc *desc, const u8 *src,
unsigned int len)
{
struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
if (unlikely(dctx->buflen)) {
u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen);
memcpy(dctx->buf + dctx->buflen, src, bytes);
src += bytes;
len -= bytes;
dctx->buflen += bytes;
if (dctx->buflen == POLY1305_BLOCK_SIZE) {
mips_poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 1);
dctx->buflen = 0;
}
}
if (likely(len >= POLY1305_BLOCK_SIZE)) {
mips_poly1305_blocks(dctx, src, len, 1);
src += round_down(len, POLY1305_BLOCK_SIZE);
len %= POLY1305_BLOCK_SIZE;
}
if (unlikely(len)) {
dctx->buflen = len;
memcpy(dctx->buf, src, len);
}
return 0;
}
void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src,
unsigned int nbytes)
{
if (unlikely(dctx->buflen)) {
u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen);
memcpy(dctx->buf + dctx->buflen, src, bytes);
src += bytes;
nbytes -= bytes;
dctx->buflen += bytes;
if (dctx->buflen == POLY1305_BLOCK_SIZE) {
poly1305_blocks_mips(&dctx->h, dctx->buf,
POLY1305_BLOCK_SIZE, 1);
dctx->buflen = 0;
}
}
if (likely(nbytes >= POLY1305_BLOCK_SIZE)) {
unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE);
poly1305_blocks_mips(&dctx->h, src, len, 1);
src += len;
nbytes %= POLY1305_BLOCK_SIZE;
}
if (unlikely(nbytes)) {
dctx->buflen = nbytes;
memcpy(dctx->buf, src, nbytes);
}
}
EXPORT_SYMBOL(poly1305_update_arch);
void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst)
{
__le32 digest[4];
u64 f = 0;
if (unlikely(dctx->buflen)) {
dctx->buf[dctx->buflen++] = 1;
memset(dctx->buf + dctx->buflen, 0,
POLY1305_BLOCK_SIZE - dctx->buflen);
poly1305_blocks_mips(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0);
}
poly1305_emit_mips(&dctx->h, digest, dctx->s);
/* mac = (h + s) % (2^128) */
f = (f >> 32) + le32_to_cpu(digest[0]);
put_unaligned_le32(f, dst);
f = (f >> 32) + le32_to_cpu(digest[1]);
put_unaligned_le32(f, dst + 4);
f = (f >> 32) + le32_to_cpu(digest[2]);
put_unaligned_le32(f, dst + 8);
f = (f >> 32) + le32_to_cpu(digest[3]);
put_unaligned_le32(f, dst + 12);
*dctx = (struct poly1305_desc_ctx){};
}
EXPORT_SYMBOL(poly1305_final_arch);
static int mips_poly1305_final(struct shash_desc *desc, u8 *dst)
{
struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc);
if (unlikely(!dctx->sset))
return -ENOKEY;
poly1305_final_arch(dctx, dst);
return 0;
}
static struct shash_alg mips_poly1305_alg = {
.init = mips_poly1305_init,
.update = mips_poly1305_update,
.final = mips_poly1305_final,
.digestsize = POLY1305_DIGEST_SIZE,
.descsize = sizeof(struct poly1305_desc_ctx),
.base.cra_name = "poly1305",
.base.cra_driver_name = "poly1305-mips",
.base.cra_priority = 200,
.base.cra_blocksize = POLY1305_BLOCK_SIZE,
.base.cra_module = THIS_MODULE,
};
static int __init mips_poly1305_mod_init(void)
{
return crypto_register_shash(&mips_poly1305_alg);
}
static void __exit mips_poly1305_mod_exit(void)
{
crypto_unregister_shash(&mips_poly1305_alg);
}
module_init(mips_poly1305_mod_init);
module_exit(mips_poly1305_mod_exit);
MODULE_LICENSE("GPL v2");
MODULE_ALIAS_CRYPTO("poly1305");
MODULE_ALIAS_CRYPTO("poly1305-mips");

File diff suppressed because it is too large Load Diff

View File

@ -724,6 +724,11 @@ config CRYPTO_POLY1305_X86_64
in IETF protocols. This is the x86_64 assembler implementation using SIMD
instructions.
config CRYPTO_POLY1305_MIPS
tristate "Poly1305 authenticator algorithm (MIPS optimized)"
depends on CPU_MIPS32 || (CPU_MIPS64 && 64BIT)
select CRYPTO_ARCH_HAVE_LIB_POLY1305
config CRYPTO_MD4
tristate "MD4 digest algorithm"
select CRYPTO_HASH

View File

@ -39,6 +39,7 @@ config CRYPTO_LIB_DES
config CRYPTO_LIB_POLY1305_RSIZE
int
default 2 if MIPS
default 4 if X86_64
default 9 if ARM || ARM64
default 1