mirror of
https://github.com/edk2-porting/linux-next.git
synced 2024-12-15 00:34:10 +08:00
crypto: arm64/sm4 - add CE implementation for cmac/xcbc/cbcmac
This patch is a CE-optimized assembly implementation for cmac/xcbc/cbcmac. Benchmark on T-Head Yitian-710 2.75 GHz, the data comes from the 300 mode of tcrypt, and compared the performance before and after this patch (the driver used before this patch is XXXmac(sm4-ce)). The abscissas are blocks of different lengths. The data is tabulated and the unit is Mb/s: Before: update-size | 16 64 256 1024 2048 4096 8192 ---------------+-------------------------------------------------------- cmac(sm4-ce) | 293.33 403.69 503.76 527.78 531.10 535.46 535.81 xcbc(sm4-ce) | 292.83 402.50 504.02 529.08 529.87 536.55 538.24 cbcmac(sm4-ce) | 318.42 415.79 497.12 515.05 523.15 521.19 523.01 After: update-size | 16 64 256 1024 2048 4096 8192 ---------------+-------------------------------------------------------- cmac-sm4-ce | 371.99 675.28 903.56 971.65 980.57 990.40 991.04 xcbc-sm4-ce | 372.11 674.55 903.47 971.61 980.96 990.42 991.10 cbcmac-sm4-ce | 371.63 675.33 903.23 972.07 981.42 990.93 991.45 Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
This commit is contained in:
parent
01f633113b
commit
6b5360a5e0
@ -35,6 +35,7 @@
|
||||
#define RTMP3 v19
|
||||
|
||||
#define RIV v20
|
||||
#define RMAC v20
|
||||
#define RMASK v21
|
||||
|
||||
|
||||
@ -1007,6 +1008,75 @@ SYM_FUNC_START(sm4_ce_xts_dec)
|
||||
ret
|
||||
SYM_FUNC_END(sm4_ce_xts_dec)
|
||||
|
||||
.align 3
|
||||
SYM_FUNC_START(sm4_ce_mac_update)
|
||||
/* input:
|
||||
* x0: round key array, CTX
|
||||
* x1: digest
|
||||
* x2: src
|
||||
* w3: nblocks
|
||||
* w4: enc_before
|
||||
* w5: enc_after
|
||||
*/
|
||||
SM4_PREPARE(x0)
|
||||
|
||||
ld1 {RMAC.16b}, [x1]
|
||||
|
||||
cbz w4, .Lmac_update
|
||||
|
||||
SM4_CRYPT_BLK(RMAC)
|
||||
|
||||
.Lmac_update:
|
||||
cbz w3, .Lmac_ret
|
||||
|
||||
sub w6, w3, #1
|
||||
cmp w5, wzr
|
||||
csel w3, w3, w6, ne
|
||||
|
||||
cbz w3, .Lmac_end
|
||||
|
||||
.Lmac_loop_4x:
|
||||
cmp w3, #4
|
||||
blt .Lmac_loop_1x
|
||||
|
||||
sub w3, w3, #4
|
||||
|
||||
ld1 {v0.16b-v3.16b}, [x2], #64
|
||||
|
||||
eor RMAC.16b, RMAC.16b, v0.16b
|
||||
SM4_CRYPT_BLK(RMAC)
|
||||
eor RMAC.16b, RMAC.16b, v1.16b
|
||||
SM4_CRYPT_BLK(RMAC)
|
||||
eor RMAC.16b, RMAC.16b, v2.16b
|
||||
SM4_CRYPT_BLK(RMAC)
|
||||
eor RMAC.16b, RMAC.16b, v3.16b
|
||||
SM4_CRYPT_BLK(RMAC)
|
||||
|
||||
cbz w3, .Lmac_end
|
||||
b .Lmac_loop_4x
|
||||
|
||||
.Lmac_loop_1x:
|
||||
sub w3, w3, #1
|
||||
|
||||
ld1 {v0.16b}, [x2], #16
|
||||
|
||||
eor RMAC.16b, RMAC.16b, v0.16b
|
||||
SM4_CRYPT_BLK(RMAC)
|
||||
|
||||
cbnz w3, .Lmac_loop_1x
|
||||
|
||||
|
||||
.Lmac_end:
|
||||
cbnz w5, .Lmac_ret
|
||||
|
||||
ld1 {v0.16b}, [x2], #16
|
||||
eor RMAC.16b, RMAC.16b, v0.16b
|
||||
|
||||
.Lmac_ret:
|
||||
st1 {RMAC.16b}, [x1]
|
||||
ret
|
||||
SYM_FUNC_END(sm4_ce_mac_update)
|
||||
|
||||
|
||||
.section ".rodata", "a"
|
||||
.align 4
|
||||
|
@ -14,8 +14,10 @@
|
||||
#include <linux/cpufeature.h>
|
||||
#include <asm/neon.h>
|
||||
#include <asm/simd.h>
|
||||
#include <crypto/b128ops.h>
|
||||
#include <crypto/internal/simd.h>
|
||||
#include <crypto/internal/skcipher.h>
|
||||
#include <crypto/internal/hash.h>
|
||||
#include <crypto/scatterwalk.h>
|
||||
#include <crypto/xts.h>
|
||||
#include <crypto/sm4.h>
|
||||
@ -47,6 +49,9 @@ asmlinkage void sm4_ce_xts_enc(const u32 *rkey1, u8 *dst, const u8 *src,
|
||||
asmlinkage void sm4_ce_xts_dec(const u32 *rkey1, u8 *dst, const u8 *src,
|
||||
u8 *tweak, unsigned int nbytes,
|
||||
const u32 *rkey2_enc);
|
||||
asmlinkage void sm4_ce_mac_update(const u32 *rkey_enc, u8 *digest,
|
||||
const u8 *src, unsigned int nblocks,
|
||||
bool enc_before, bool enc_after);
|
||||
|
||||
EXPORT_SYMBOL(sm4_ce_expand_key);
|
||||
EXPORT_SYMBOL(sm4_ce_crypt_block);
|
||||
@ -58,6 +63,16 @@ struct sm4_xts_ctx {
|
||||
struct sm4_ctx key2;
|
||||
};
|
||||
|
||||
struct sm4_mac_tfm_ctx {
|
||||
struct sm4_ctx key;
|
||||
u8 __aligned(8) consts[];
|
||||
};
|
||||
|
||||
struct sm4_mac_desc_ctx {
|
||||
unsigned int len;
|
||||
u8 digest[SM4_BLOCK_SIZE];
|
||||
};
|
||||
|
||||
static int sm4_setkey(struct crypto_skcipher *tfm, const u8 *key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
@ -594,13 +609,260 @@ static struct skcipher_alg sm4_algs[] = {
|
||||
}
|
||||
};
|
||||
|
||||
static int sm4_cbcmac_setkey(struct crypto_shash *tfm, const u8 *key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct sm4_mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
|
||||
|
||||
if (key_len != SM4_KEY_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
kernel_neon_begin();
|
||||
sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
|
||||
crypto_sm4_fk, crypto_sm4_ck);
|
||||
kernel_neon_end();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sm4_cmac_setkey(struct crypto_shash *tfm, const u8 *key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct sm4_mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
|
||||
be128 *consts = (be128 *)ctx->consts;
|
||||
u64 a, b;
|
||||
|
||||
if (key_len != SM4_KEY_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
memset(consts, 0, SM4_BLOCK_SIZE);
|
||||
|
||||
kernel_neon_begin();
|
||||
|
||||
sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
|
||||
crypto_sm4_fk, crypto_sm4_ck);
|
||||
|
||||
/* encrypt the zero block */
|
||||
sm4_ce_crypt_block(ctx->key.rkey_enc, (u8 *)consts, (const u8 *)consts);
|
||||
|
||||
kernel_neon_end();
|
||||
|
||||
/* gf(2^128) multiply zero-ciphertext with u and u^2 */
|
||||
a = be64_to_cpu(consts[0].a);
|
||||
b = be64_to_cpu(consts[0].b);
|
||||
consts[0].a = cpu_to_be64((a << 1) | (b >> 63));
|
||||
consts[0].b = cpu_to_be64((b << 1) ^ ((a >> 63) ? 0x87 : 0));
|
||||
|
||||
a = be64_to_cpu(consts[0].a);
|
||||
b = be64_to_cpu(consts[0].b);
|
||||
consts[1].a = cpu_to_be64((a << 1) | (b >> 63));
|
||||
consts[1].b = cpu_to_be64((b << 1) ^ ((a >> 63) ? 0x87 : 0));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sm4_xcbc_setkey(struct crypto_shash *tfm, const u8 *key,
|
||||
unsigned int key_len)
|
||||
{
|
||||
struct sm4_mac_tfm_ctx *ctx = crypto_shash_ctx(tfm);
|
||||
u8 __aligned(8) key2[SM4_BLOCK_SIZE];
|
||||
static u8 const ks[3][SM4_BLOCK_SIZE] = {
|
||||
{ [0 ... SM4_BLOCK_SIZE - 1] = 0x1},
|
||||
{ [0 ... SM4_BLOCK_SIZE - 1] = 0x2},
|
||||
{ [0 ... SM4_BLOCK_SIZE - 1] = 0x3},
|
||||
};
|
||||
|
||||
if (key_len != SM4_KEY_SIZE)
|
||||
return -EINVAL;
|
||||
|
||||
kernel_neon_begin();
|
||||
|
||||
sm4_ce_expand_key(key, ctx->key.rkey_enc, ctx->key.rkey_dec,
|
||||
crypto_sm4_fk, crypto_sm4_ck);
|
||||
|
||||
sm4_ce_crypt_block(ctx->key.rkey_enc, key2, ks[0]);
|
||||
sm4_ce_crypt(ctx->key.rkey_enc, ctx->consts, ks[1], 2);
|
||||
|
||||
sm4_ce_expand_key(key2, ctx->key.rkey_enc, ctx->key.rkey_dec,
|
||||
crypto_sm4_fk, crypto_sm4_ck);
|
||||
|
||||
kernel_neon_end();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sm4_mac_init(struct shash_desc *desc)
|
||||
{
|
||||
struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
memset(ctx->digest, 0, SM4_BLOCK_SIZE);
|
||||
ctx->len = 0;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sm4_mac_update(struct shash_desc *desc, const u8 *p,
|
||||
unsigned int len)
|
||||
{
|
||||
struct sm4_mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
|
||||
struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
unsigned int l, nblocks;
|
||||
|
||||
if (len == 0)
|
||||
return 0;
|
||||
|
||||
if (ctx->len || ctx->len + len < SM4_BLOCK_SIZE) {
|
||||
l = min(len, SM4_BLOCK_SIZE - ctx->len);
|
||||
|
||||
crypto_xor(ctx->digest + ctx->len, p, l);
|
||||
ctx->len += l;
|
||||
len -= l;
|
||||
p += l;
|
||||
}
|
||||
|
||||
if (len && (ctx->len % SM4_BLOCK_SIZE) == 0) {
|
||||
kernel_neon_begin();
|
||||
|
||||
if (len < SM4_BLOCK_SIZE && ctx->len == SM4_BLOCK_SIZE) {
|
||||
sm4_ce_crypt_block(tctx->key.rkey_enc,
|
||||
ctx->digest, ctx->digest);
|
||||
ctx->len = 0;
|
||||
} else {
|
||||
nblocks = len / SM4_BLOCK_SIZE;
|
||||
len %= SM4_BLOCK_SIZE;
|
||||
|
||||
sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, p,
|
||||
nblocks, (ctx->len == SM4_BLOCK_SIZE),
|
||||
(len != 0));
|
||||
|
||||
p += nblocks * SM4_BLOCK_SIZE;
|
||||
|
||||
if (len == 0)
|
||||
ctx->len = SM4_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
kernel_neon_end();
|
||||
|
||||
if (len) {
|
||||
crypto_xor(ctx->digest, p, len);
|
||||
ctx->len = len;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sm4_cmac_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
struct sm4_mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
|
||||
struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
const u8 *consts = tctx->consts;
|
||||
|
||||
if (ctx->len != SM4_BLOCK_SIZE) {
|
||||
ctx->digest[ctx->len] ^= 0x80;
|
||||
consts += SM4_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
kernel_neon_begin();
|
||||
sm4_ce_mac_update(tctx->key.rkey_enc, ctx->digest, consts, 1,
|
||||
false, true);
|
||||
kernel_neon_end();
|
||||
|
||||
memcpy(out, ctx->digest, SM4_BLOCK_SIZE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int sm4_cbcmac_final(struct shash_desc *desc, u8 *out)
|
||||
{
|
||||
struct sm4_mac_tfm_ctx *tctx = crypto_shash_ctx(desc->tfm);
|
||||
struct sm4_mac_desc_ctx *ctx = shash_desc_ctx(desc);
|
||||
|
||||
if (ctx->len) {
|
||||
kernel_neon_begin();
|
||||
sm4_ce_crypt_block(tctx->key.rkey_enc, ctx->digest,
|
||||
ctx->digest);
|
||||
kernel_neon_end();
|
||||
}
|
||||
|
||||
memcpy(out, ctx->digest, SM4_BLOCK_SIZE);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct shash_alg sm4_mac_algs[] = {
|
||||
{
|
||||
.base = {
|
||||
.cra_name = "cmac(sm4)",
|
||||
.cra_driver_name = "cmac-sm4-ce",
|
||||
.cra_priority = 400,
|
||||
.cra_blocksize = SM4_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct sm4_mac_tfm_ctx)
|
||||
+ SM4_BLOCK_SIZE * 2,
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.digestsize = SM4_BLOCK_SIZE,
|
||||
.init = sm4_mac_init,
|
||||
.update = sm4_mac_update,
|
||||
.final = sm4_cmac_final,
|
||||
.setkey = sm4_cmac_setkey,
|
||||
.descsize = sizeof(struct sm4_mac_desc_ctx),
|
||||
}, {
|
||||
.base = {
|
||||
.cra_name = "xcbc(sm4)",
|
||||
.cra_driver_name = "xcbc-sm4-ce",
|
||||
.cra_priority = 400,
|
||||
.cra_blocksize = SM4_BLOCK_SIZE,
|
||||
.cra_ctxsize = sizeof(struct sm4_mac_tfm_ctx)
|
||||
+ SM4_BLOCK_SIZE * 2,
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.digestsize = SM4_BLOCK_SIZE,
|
||||
.init = sm4_mac_init,
|
||||
.update = sm4_mac_update,
|
||||
.final = sm4_cmac_final,
|
||||
.setkey = sm4_xcbc_setkey,
|
||||
.descsize = sizeof(struct sm4_mac_desc_ctx),
|
||||
}, {
|
||||
.base = {
|
||||
.cra_name = "cbcmac(sm4)",
|
||||
.cra_driver_name = "cbcmac-sm4-ce",
|
||||
.cra_priority = 400,
|
||||
.cra_blocksize = 1,
|
||||
.cra_ctxsize = sizeof(struct sm4_mac_tfm_ctx),
|
||||
.cra_module = THIS_MODULE,
|
||||
},
|
||||
.digestsize = SM4_BLOCK_SIZE,
|
||||
.init = sm4_mac_init,
|
||||
.update = sm4_mac_update,
|
||||
.final = sm4_cbcmac_final,
|
||||
.setkey = sm4_cbcmac_setkey,
|
||||
.descsize = sizeof(struct sm4_mac_desc_ctx),
|
||||
}
|
||||
};
|
||||
|
||||
static int __init sm4_init(void)
|
||||
{
|
||||
return crypto_register_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs));
|
||||
int err;
|
||||
|
||||
err = crypto_register_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs));
|
||||
if (err)
|
||||
return err;
|
||||
|
||||
err = crypto_register_shashes(sm4_mac_algs, ARRAY_SIZE(sm4_mac_algs));
|
||||
if (err)
|
||||
goto out_err;
|
||||
|
||||
return 0;
|
||||
|
||||
out_err:
|
||||
crypto_unregister_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs));
|
||||
return err;
|
||||
}
|
||||
|
||||
static void __exit sm4_exit(void)
|
||||
{
|
||||
crypto_unregister_shashes(sm4_mac_algs, ARRAY_SIZE(sm4_mac_algs));
|
||||
crypto_unregister_skciphers(sm4_algs, ARRAY_SIZE(sm4_algs));
|
||||
}
|
||||
|
||||
@ -616,5 +878,8 @@ MODULE_ALIAS_CRYPTO("cfb(sm4)");
|
||||
MODULE_ALIAS_CRYPTO("ctr(sm4)");
|
||||
MODULE_ALIAS_CRYPTO("cts(cbc(sm4))");
|
||||
MODULE_ALIAS_CRYPTO("xts(sm4)");
|
||||
MODULE_ALIAS_CRYPTO("cmac(sm4)");
|
||||
MODULE_ALIAS_CRYPTO("xcbc(sm4)");
|
||||
MODULE_ALIAS_CRYPTO("cbcmac(sm4)");
|
||||
MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>");
|
||||
MODULE_LICENSE("GPL v2");
|
||||
|
Loading…
Reference in New Issue
Block a user