Merge git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6

Pull crypto update from Herbert Xu:
 - Do not idle omap device between crypto operations in one session.
 - Added sha224/sha384 shims for SSSE3.
 - More optimisations for camellia-aesni-avx2.
 - Removed defunct blowfish/twofish AVX2 implementations.
 - Added unaligned buffer self-tests.
 - Added PCLMULQDQ optimisation for CRCT10DIF.
 - Added support for Freescale's DCP co-processor
 - Misc fixes.

* git://git.kernel.org/pub/scm/linux/kernel/git/herbert/crypto-2.6: (44 commits)
  crypto: testmgr - test hash implementations with unaligned buffers
  crypto: testmgr - test AEADs with unaligned buffers
  crypto: testmgr - test skciphers with unaligned buffers
  crypto: testmgr - check that entries in alg_test_descs are in correct order
  Revert "crypto: twofish - add AVX2/x86_64 assembler implementation of twofish cipher"
  Revert "crypto: blowfish - add AVX2/x86_64 implementation of blowfish cipher"
  crypto: camellia-aesni-avx2 - tune assembly code for more performance
  hwrng: bcm2835 - fix MODULE_LICENSE tag
  hwrng: nomadik - use clk_prepare_enable()
  crypto: picoxcell - replace strict_strtoul() with kstrtoul()
  crypto: dcp - Staticize local symbols
  crypto: dcp - Use NULL instead of 0
  crypto: dcp - Use devm_* APIs
  crypto: dcp - Remove redundant platform_set_drvdata()
  hwrng: use platform_{get,set}_drvdata()
  crypto: omap-aes - Don't idle/start AES device between Encrypt operations
  crypto: crct10dif - Use PTR_RET
  crypto: ux500 - Cocci spatch "resource_size.spatch"
  crypto: sha256_ssse3 - add sha224 support
  crypto: sha512_ssse3 - add sha384 support
  ...
This commit is contained in:
Linus Torvalds 2013-07-05 12:12:33 -07:00
commit b2c311075d
48 changed files with 2541 additions and 2563 deletions

View File

@ -736,7 +736,7 @@
dcp@80028000 {
reg = <0x80028000 0x2000>;
interrupts = <52 53 54>;
status = "disabled";
compatible = "fsl-dcp";
};
pxp@8002a000 {

View File

@ -3,8 +3,6 @@
#
avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no)
avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\
$(comma)4)$(comma)%ymm2,yes,no)
obj-$(CONFIG_CRYPTO_ABLK_HELPER_X86) += ablk_helper.o
obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o
@ -29,6 +27,7 @@ obj-$(CONFIG_CRYPTO_SHA1_SSSE3) += sha1-ssse3.o
obj-$(CONFIG_CRYPTO_CRC32_PCLMUL) += crc32-pclmul.o
obj-$(CONFIG_CRYPTO_SHA256_SSSE3) += sha256-ssse3.o
obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o
obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o
# These modules require assembler to support AVX.
ifeq ($(avx_supported),yes)
@ -42,10 +41,8 @@ endif
# These modules require assembler to support AVX2.
ifeq ($(avx2_supported),yes)
obj-$(CONFIG_CRYPTO_BLOWFISH_AVX2_X86_64) += blowfish-avx2.o
obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX2_X86_64) += camellia-aesni-avx2.o
obj-$(CONFIG_CRYPTO_SERPENT_AVX2_X86_64) += serpent-avx2.o
obj-$(CONFIG_CRYPTO_TWOFISH_AVX2_X86_64) += twofish-avx2.o
endif
aes-i586-y := aes-i586-asm_32.o aes_glue.o
@ -73,10 +70,8 @@ ifeq ($(avx_supported),yes)
endif
ifeq ($(avx2_supported),yes)
blowfish-avx2-y := blowfish-avx2-asm_64.o blowfish_avx2_glue.o
camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o
serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o
twofish-avx2-y := twofish-avx2-asm_64.o twofish_avx2_glue.o
endif
aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o
@ -87,3 +82,4 @@ crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o
crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o
sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o
sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o
crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o

View File

@ -1,449 +0,0 @@
/*
* x86_64/AVX2 assembler optimized version of Blowfish
*
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
*/
#include <linux/linkage.h>
.file "blowfish-avx2-asm_64.S"
.data
.align 32
.Lprefetch_mask:
.long 0*64
.long 1*64
.long 2*64
.long 3*64
.long 4*64
.long 5*64
.long 6*64
.long 7*64
.Lbswap32_mask:
.long 0x00010203
.long 0x04050607
.long 0x08090a0b
.long 0x0c0d0e0f
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
.Lbswap_iv_mask:
.byte 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0
.text
/* structure of crypto context */
#define p 0
#define s0 ((16 + 2) * 4)
#define s1 ((16 + 2 + (1 * 256)) * 4)
#define s2 ((16 + 2 + (2 * 256)) * 4)
#define s3 ((16 + 2 + (3 * 256)) * 4)
/* register macros */
#define CTX %rdi
#define RIO %rdx
#define RS0 %rax
#define RS1 %r8
#define RS2 %r9
#define RS3 %r10
#define RLOOP %r11
#define RLOOPd %r11d
#define RXr0 %ymm8
#define RXr1 %ymm9
#define RXr2 %ymm10
#define RXr3 %ymm11
#define RXl0 %ymm12
#define RXl1 %ymm13
#define RXl2 %ymm14
#define RXl3 %ymm15
/* temp regs */
#define RT0 %ymm0
#define RT0x %xmm0
#define RT1 %ymm1
#define RT1x %xmm1
#define RIDX0 %ymm2
#define RIDX1 %ymm3
#define RIDX1x %xmm3
#define RIDX2 %ymm4
#define RIDX3 %ymm5
/* vpgatherdd mask and '-1' */
#define RNOT %ymm6
/* byte mask, (-1 >> 24) */
#define RBYTE %ymm7
/***********************************************************************
* 32-way AVX2 blowfish
***********************************************************************/
#define F(xl, xr) \
vpsrld $24, xl, RIDX0; \
vpsrld $16, xl, RIDX1; \
vpsrld $8, xl, RIDX2; \
vpand RBYTE, RIDX1, RIDX1; \
vpand RBYTE, RIDX2, RIDX2; \
vpand RBYTE, xl, RIDX3; \
\
vpgatherdd RNOT, (RS0, RIDX0, 4), RT0; \
vpcmpeqd RNOT, RNOT, RNOT; \
vpcmpeqd RIDX0, RIDX0, RIDX0; \
\
vpgatherdd RNOT, (RS1, RIDX1, 4), RT1; \
vpcmpeqd RIDX1, RIDX1, RIDX1; \
vpaddd RT0, RT1, RT0; \
\
vpgatherdd RIDX0, (RS2, RIDX2, 4), RT1; \
vpxor RT0, RT1, RT0; \
\
vpgatherdd RIDX1, (RS3, RIDX3, 4), RT1; \
vpcmpeqd RNOT, RNOT, RNOT; \
vpaddd RT0, RT1, RT0; \
\
vpxor RT0, xr, xr;
#define add_roundkey(xl, nmem) \
vpbroadcastd nmem, RT0; \
vpxor RT0, xl ## 0, xl ## 0; \
vpxor RT0, xl ## 1, xl ## 1; \
vpxor RT0, xl ## 2, xl ## 2; \
vpxor RT0, xl ## 3, xl ## 3;
#define round_enc() \
add_roundkey(RXr, p(CTX,RLOOP,4)); \
F(RXl0, RXr0); \
F(RXl1, RXr1); \
F(RXl2, RXr2); \
F(RXl3, RXr3); \
\
add_roundkey(RXl, p+4(CTX,RLOOP,4)); \
F(RXr0, RXl0); \
F(RXr1, RXl1); \
F(RXr2, RXl2); \
F(RXr3, RXl3);
#define round_dec() \
add_roundkey(RXr, p+4*2(CTX,RLOOP,4)); \
F(RXl0, RXr0); \
F(RXl1, RXr1); \
F(RXl2, RXr2); \
F(RXl3, RXr3); \
\
add_roundkey(RXl, p+4(CTX,RLOOP,4)); \
F(RXr0, RXl0); \
F(RXr1, RXl1); \
F(RXr2, RXl2); \
F(RXr3, RXl3);
#define init_round_constants() \
vpcmpeqd RNOT, RNOT, RNOT; \
leaq s0(CTX), RS0; \
leaq s1(CTX), RS1; \
leaq s2(CTX), RS2; \
leaq s3(CTX), RS3; \
vpsrld $24, RNOT, RBYTE;
#define transpose_2x2(x0, x1, t0) \
vpunpckldq x0, x1, t0; \
vpunpckhdq x0, x1, x1; \
\
vpunpcklqdq t0, x1, x0; \
vpunpckhqdq t0, x1, x1;
#define read_block(xl, xr) \
vbroadcasti128 .Lbswap32_mask, RT1; \
\
vpshufb RT1, xl ## 0, xl ## 0; \
vpshufb RT1, xr ## 0, xr ## 0; \
vpshufb RT1, xl ## 1, xl ## 1; \
vpshufb RT1, xr ## 1, xr ## 1; \
vpshufb RT1, xl ## 2, xl ## 2; \
vpshufb RT1, xr ## 2, xr ## 2; \
vpshufb RT1, xl ## 3, xl ## 3; \
vpshufb RT1, xr ## 3, xr ## 3; \
\
transpose_2x2(xl ## 0, xr ## 0, RT0); \
transpose_2x2(xl ## 1, xr ## 1, RT0); \
transpose_2x2(xl ## 2, xr ## 2, RT0); \
transpose_2x2(xl ## 3, xr ## 3, RT0);
#define write_block(xl, xr) \
vbroadcasti128 .Lbswap32_mask, RT1; \
\
transpose_2x2(xl ## 0, xr ## 0, RT0); \
transpose_2x2(xl ## 1, xr ## 1, RT0); \
transpose_2x2(xl ## 2, xr ## 2, RT0); \
transpose_2x2(xl ## 3, xr ## 3, RT0); \
\
vpshufb RT1, xl ## 0, xl ## 0; \
vpshufb RT1, xr ## 0, xr ## 0; \
vpshufb RT1, xl ## 1, xl ## 1; \
vpshufb RT1, xr ## 1, xr ## 1; \
vpshufb RT1, xl ## 2, xl ## 2; \
vpshufb RT1, xr ## 2, xr ## 2; \
vpshufb RT1, xl ## 3, xl ## 3; \
vpshufb RT1, xr ## 3, xr ## 3;
.align 8
__blowfish_enc_blk32:
/* input:
* %rdi: ctx, CTX
* RXl0..4, RXr0..4: plaintext
* output:
* RXl0..4, RXr0..4: ciphertext (RXl <=> RXr swapped)
*/
init_round_constants();
read_block(RXl, RXr);
movl $1, RLOOPd;
add_roundkey(RXl, p+4*(0)(CTX));
.align 4
.L__enc_loop:
round_enc();
leal 2(RLOOPd), RLOOPd;
cmpl $17, RLOOPd;
jne .L__enc_loop;
add_roundkey(RXr, p+4*(17)(CTX));
write_block(RXl, RXr);
ret;
ENDPROC(__blowfish_enc_blk32)
.align 8
__blowfish_dec_blk32:
/* input:
* %rdi: ctx, CTX
* RXl0..4, RXr0..4: ciphertext
* output:
* RXl0..4, RXr0..4: plaintext (RXl <=> RXr swapped)
*/
init_round_constants();
read_block(RXl, RXr);
movl $14, RLOOPd;
add_roundkey(RXl, p+4*(17)(CTX));
.align 4
.L__dec_loop:
round_dec();
addl $-2, RLOOPd;
jns .L__dec_loop;
add_roundkey(RXr, p+4*(0)(CTX));
write_block(RXl, RXr);
ret;
ENDPROC(__blowfish_dec_blk32)
ENTRY(blowfish_ecb_enc_32way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
vzeroupper;
vmovdqu 0*32(%rdx), RXl0;
vmovdqu 1*32(%rdx), RXr0;
vmovdqu 2*32(%rdx), RXl1;
vmovdqu 3*32(%rdx), RXr1;
vmovdqu 4*32(%rdx), RXl2;
vmovdqu 5*32(%rdx), RXr2;
vmovdqu 6*32(%rdx), RXl3;
vmovdqu 7*32(%rdx), RXr3;
call __blowfish_enc_blk32;
vmovdqu RXr0, 0*32(%rsi);
vmovdqu RXl0, 1*32(%rsi);
vmovdqu RXr1, 2*32(%rsi);
vmovdqu RXl1, 3*32(%rsi);
vmovdqu RXr2, 4*32(%rsi);
vmovdqu RXl2, 5*32(%rsi);
vmovdqu RXr3, 6*32(%rsi);
vmovdqu RXl3, 7*32(%rsi);
vzeroupper;
ret;
ENDPROC(blowfish_ecb_enc_32way)
ENTRY(blowfish_ecb_dec_32way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
vzeroupper;
vmovdqu 0*32(%rdx), RXl0;
vmovdqu 1*32(%rdx), RXr0;
vmovdqu 2*32(%rdx), RXl1;
vmovdqu 3*32(%rdx), RXr1;
vmovdqu 4*32(%rdx), RXl2;
vmovdqu 5*32(%rdx), RXr2;
vmovdqu 6*32(%rdx), RXl3;
vmovdqu 7*32(%rdx), RXr3;
call __blowfish_dec_blk32;
vmovdqu RXr0, 0*32(%rsi);
vmovdqu RXl0, 1*32(%rsi);
vmovdqu RXr1, 2*32(%rsi);
vmovdqu RXl1, 3*32(%rsi);
vmovdqu RXr2, 4*32(%rsi);
vmovdqu RXl2, 5*32(%rsi);
vmovdqu RXr3, 6*32(%rsi);
vmovdqu RXl3, 7*32(%rsi);
vzeroupper;
ret;
ENDPROC(blowfish_ecb_dec_32way)
ENTRY(blowfish_cbc_dec_32way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
vzeroupper;
vmovdqu 0*32(%rdx), RXl0;
vmovdqu 1*32(%rdx), RXr0;
vmovdqu 2*32(%rdx), RXl1;
vmovdqu 3*32(%rdx), RXr1;
vmovdqu 4*32(%rdx), RXl2;
vmovdqu 5*32(%rdx), RXr2;
vmovdqu 6*32(%rdx), RXl3;
vmovdqu 7*32(%rdx), RXr3;
call __blowfish_dec_blk32;
/* xor with src */
vmovq (%rdx), RT0x;
vpshufd $0x4f, RT0x, RT0x;
vinserti128 $1, 8(%rdx), RT0, RT0;
vpxor RT0, RXr0, RXr0;
vpxor 0*32+24(%rdx), RXl0, RXl0;
vpxor 1*32+24(%rdx), RXr1, RXr1;
vpxor 2*32+24(%rdx), RXl1, RXl1;
vpxor 3*32+24(%rdx), RXr2, RXr2;
vpxor 4*32+24(%rdx), RXl2, RXl2;
vpxor 5*32+24(%rdx), RXr3, RXr3;
vpxor 6*32+24(%rdx), RXl3, RXl3;
vmovdqu RXr0, (0*32)(%rsi);
vmovdqu RXl0, (1*32)(%rsi);
vmovdqu RXr1, (2*32)(%rsi);
vmovdqu RXl1, (3*32)(%rsi);
vmovdqu RXr2, (4*32)(%rsi);
vmovdqu RXl2, (5*32)(%rsi);
vmovdqu RXr3, (6*32)(%rsi);
vmovdqu RXl3, (7*32)(%rsi);
vzeroupper;
ret;
ENDPROC(blowfish_cbc_dec_32way)
ENTRY(blowfish_ctr_32way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
* %rcx: iv (big endian, 64bit)
*/
vzeroupper;
vpcmpeqd RT0, RT0, RT0;
vpsrldq $8, RT0, RT0; /* a: -1, b: 0, c: -1, d: 0 */
vpcmpeqd RT1x, RT1x, RT1x;
vpaddq RT1x, RT1x, RT1x; /* a: -2, b: -2 */
vpxor RIDX0, RIDX0, RIDX0;
vinserti128 $1, RT1x, RIDX0, RIDX0; /* a: 0, b: 0, c: -2, d: -2 */
vpaddq RIDX0, RT0, RT0; /* a: -1, b: 0, c: -3, d: -2 */
vpcmpeqd RT1, RT1, RT1;
vpaddq RT1, RT1, RT1; /* a: -2, b: -2, c: -2, d: -2 */
vpaddq RT1, RT1, RIDX2; /* a: -4, b: -4, c: -4, d: -4 */
vbroadcasti128 .Lbswap_iv_mask, RIDX0;
vbroadcasti128 .Lbswap128_mask, RIDX1;
/* load IV and byteswap */
vmovq (%rcx), RT1x;
vinserti128 $1, RT1x, RT1, RT1; /* a: BE, b: 0, c: BE, d: 0 */
vpshufb RIDX0, RT1, RT1; /* a: LE, b: LE, c: LE, d: LE */
/* construct IVs */
vpsubq RT0, RT1, RT1; /* a: le1, b: le0, c: le3, d: le2 */
vpshufb RIDX1, RT1, RXl0; /* a: be0, b: be1, c: be2, d: be3 */
vpsubq RIDX2, RT1, RT1; /* le5, le4, le7, le6 */
vpshufb RIDX1, RT1, RXr0; /* be4, be5, be6, be7 */
vpsubq RIDX2, RT1, RT1;
vpshufb RIDX1, RT1, RXl1;
vpsubq RIDX2, RT1, RT1;
vpshufb RIDX1, RT1, RXr1;
vpsubq RIDX2, RT1, RT1;
vpshufb RIDX1, RT1, RXl2;
vpsubq RIDX2, RT1, RT1;
vpshufb RIDX1, RT1, RXr2;
vpsubq RIDX2, RT1, RT1;
vpshufb RIDX1, RT1, RXl3;
vpsubq RIDX2, RT1, RT1;
vpshufb RIDX1, RT1, RXr3;
/* store last IV */
vpsubq RIDX2, RT1, RT1; /* a: le33, b: le32, ... */
vpshufb RIDX1x, RT1x, RT1x; /* a: be32, ... */
vmovq RT1x, (%rcx);
call __blowfish_enc_blk32;
/* dst = src ^ iv */
vpxor 0*32(%rdx), RXr0, RXr0;
vpxor 1*32(%rdx), RXl0, RXl0;
vpxor 2*32(%rdx), RXr1, RXr1;
vpxor 3*32(%rdx), RXl1, RXl1;
vpxor 4*32(%rdx), RXr2, RXr2;
vpxor 5*32(%rdx), RXl2, RXl2;
vpxor 6*32(%rdx), RXr3, RXr3;
vpxor 7*32(%rdx), RXl3, RXl3;
vmovdqu RXr0, (0*32)(%rsi);
vmovdqu RXl0, (1*32)(%rsi);
vmovdqu RXr1, (2*32)(%rsi);
vmovdqu RXl1, (3*32)(%rsi);
vmovdqu RXr2, (4*32)(%rsi);
vmovdqu RXl2, (5*32)(%rsi);
vmovdqu RXr3, (6*32)(%rsi);
vmovdqu RXl3, (7*32)(%rsi);
vzeroupper;
ret;
ENDPROC(blowfish_ctr_32way)

View File

@ -1,585 +0,0 @@
/*
* Glue Code for x86_64/AVX2 assembler optimized version of Blowfish
*
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
* Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
* CTR part based on code (crypto/ctr.c) by:
* (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/crypto.h>
#include <linux/err.h>
#include <crypto/algapi.h>
#include <crypto/blowfish.h>
#include <crypto/cryptd.h>
#include <crypto/ctr.h>
#include <asm/i387.h>
#include <asm/xcr.h>
#include <asm/xsave.h>
#include <asm/crypto/blowfish.h>
#include <asm/crypto/ablk_helper.h>
#include <crypto/scatterwalk.h>
#define BF_AVX2_PARALLEL_BLOCKS 32
/* 32-way AVX2 parallel cipher functions */
asmlinkage void blowfish_ecb_enc_32way(struct bf_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void blowfish_ecb_dec_32way(struct bf_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void blowfish_cbc_dec_32way(struct bf_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void blowfish_ctr_32way(struct bf_ctx *ctx, u8 *dst, const u8 *src,
__be64 *iv);
static inline bool bf_fpu_begin(bool fpu_enabled, unsigned int nbytes)
{
if (fpu_enabled)
return true;
/* FPU is only used when chunk to be processed is large enough, so
* do not enable FPU until it is necessary.
*/
if (nbytes < BF_BLOCK_SIZE * BF_AVX2_PARALLEL_BLOCKS)
return false;
kernel_fpu_begin();
return true;
}
static inline void bf_fpu_end(bool fpu_enabled)
{
if (fpu_enabled)
kernel_fpu_end();
}
static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
bool enc)
{
bool fpu_enabled = false;
struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
const unsigned int bsize = BF_BLOCK_SIZE;
unsigned int nbytes;
int err;
err = blkcipher_walk_virt(desc, walk);
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
while ((nbytes = walk->nbytes)) {
u8 *wsrc = walk->src.virt.addr;
u8 *wdst = walk->dst.virt.addr;
fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes);
/* Process multi-block AVX2 batch */
if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) {
do {
if (enc)
blowfish_ecb_enc_32way(ctx, wdst, wsrc);
else
blowfish_ecb_dec_32way(ctx, wdst, wsrc);
wsrc += bsize * BF_AVX2_PARALLEL_BLOCKS;
wdst += bsize * BF_AVX2_PARALLEL_BLOCKS;
nbytes -= bsize * BF_AVX2_PARALLEL_BLOCKS;
} while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS);
if (nbytes < bsize)
goto done;
}
/* Process multi-block batch */
if (nbytes >= bsize * BF_PARALLEL_BLOCKS) {
do {
if (enc)
blowfish_enc_blk_4way(ctx, wdst, wsrc);
else
blowfish_dec_blk_4way(ctx, wdst, wsrc);
wsrc += bsize * BF_PARALLEL_BLOCKS;
wdst += bsize * BF_PARALLEL_BLOCKS;
nbytes -= bsize * BF_PARALLEL_BLOCKS;
} while (nbytes >= bsize * BF_PARALLEL_BLOCKS);
if (nbytes < bsize)
goto done;
}
/* Handle leftovers */
do {
if (enc)
blowfish_enc_blk(ctx, wdst, wsrc);
else
blowfish_dec_blk(ctx, wdst, wsrc);
wsrc += bsize;
wdst += bsize;
nbytes -= bsize;
} while (nbytes >= bsize);
done:
err = blkcipher_walk_done(desc, walk, nbytes);
}
bf_fpu_end(fpu_enabled);
return err;
}
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
return ecb_crypt(desc, &walk, true);
}
static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
blkcipher_walk_init(&walk, dst, src, nbytes);
return ecb_crypt(desc, &walk, false);
}
static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = BF_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
u64 *dst = (u64 *)walk->dst.virt.addr;
u64 *iv = (u64 *)walk->iv;
do {
*dst = *src ^ *iv;
blowfish_enc_blk(ctx, (u8 *)dst, (u8 *)dst);
iv = dst;
src += 1;
dst += 1;
nbytes -= bsize;
} while (nbytes >= bsize);
*(u64 *)walk->iv = *iv;
return nbytes;
}
static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
while ((nbytes = walk.nbytes)) {
nbytes = __cbc_encrypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
return err;
}
static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
const unsigned int bsize = BF_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
u64 *dst = (u64 *)walk->dst.virt.addr;
u64 last_iv;
int i;
/* Start of the last block. */
src += nbytes / bsize - 1;
dst += nbytes / bsize - 1;
last_iv = *src;
/* Process multi-block AVX2 batch */
if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) {
do {
nbytes -= bsize * (BF_AVX2_PARALLEL_BLOCKS - 1);
src -= BF_AVX2_PARALLEL_BLOCKS - 1;
dst -= BF_AVX2_PARALLEL_BLOCKS - 1;
blowfish_cbc_dec_32way(ctx, (u8 *)dst, (u8 *)src);
nbytes -= bsize;
if (nbytes < bsize)
goto done;
*dst ^= *(src - 1);
src -= 1;
dst -= 1;
} while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS);
if (nbytes < bsize)
goto done;
}
/* Process multi-block batch */
if (nbytes >= bsize * BF_PARALLEL_BLOCKS) {
u64 ivs[BF_PARALLEL_BLOCKS - 1];
do {
nbytes -= bsize * (BF_PARALLEL_BLOCKS - 1);
src -= BF_PARALLEL_BLOCKS - 1;
dst -= BF_PARALLEL_BLOCKS - 1;
for (i = 0; i < BF_PARALLEL_BLOCKS - 1; i++)
ivs[i] = src[i];
blowfish_dec_blk_4way(ctx, (u8 *)dst, (u8 *)src);
for (i = 0; i < BF_PARALLEL_BLOCKS - 1; i++)
dst[i + 1] ^= ivs[i];
nbytes -= bsize;
if (nbytes < bsize)
goto done;
*dst ^= *(src - 1);
src -= 1;
dst -= 1;
} while (nbytes >= bsize * BF_PARALLEL_BLOCKS);
if (nbytes < bsize)
goto done;
}
/* Handle leftovers */
for (;;) {
blowfish_dec_blk(ctx, (u8 *)dst, (u8 *)src);
nbytes -= bsize;
if (nbytes < bsize)
break;
*dst ^= *(src - 1);
src -= 1;
dst -= 1;
}
done:
*dst ^= *(u64 *)walk->iv;
*(u64 *)walk->iv = last_iv;
return nbytes;
}
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
bool fpu_enabled = false;
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt(desc, &walk);
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
while ((nbytes = walk.nbytes)) {
fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes);
nbytes = __cbc_decrypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
bf_fpu_end(fpu_enabled);
return err;
}
static void ctr_crypt_final(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
u8 *ctrblk = walk->iv;
u8 keystream[BF_BLOCK_SIZE];
u8 *src = walk->src.virt.addr;
u8 *dst = walk->dst.virt.addr;
unsigned int nbytes = walk->nbytes;
blowfish_enc_blk(ctx, keystream, ctrblk);
crypto_xor(keystream, src, nbytes);
memcpy(dst, keystream, nbytes);
crypto_inc(ctrblk, BF_BLOCK_SIZE);
}
static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
struct blkcipher_walk *walk)
{
struct bf_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
unsigned int bsize = BF_BLOCK_SIZE;
unsigned int nbytes = walk->nbytes;
u64 *src = (u64 *)walk->src.virt.addr;
u64 *dst = (u64 *)walk->dst.virt.addr;
int i;
/* Process multi-block AVX2 batch */
if (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS) {
do {
blowfish_ctr_32way(ctx, (u8 *)dst, (u8 *)src,
(__be64 *)walk->iv);
src += BF_AVX2_PARALLEL_BLOCKS;
dst += BF_AVX2_PARALLEL_BLOCKS;
nbytes -= bsize * BF_AVX2_PARALLEL_BLOCKS;
} while (nbytes >= bsize * BF_AVX2_PARALLEL_BLOCKS);
if (nbytes < bsize)
goto done;
}
/* Process four block batch */
if (nbytes >= bsize * BF_PARALLEL_BLOCKS) {
__be64 ctrblocks[BF_PARALLEL_BLOCKS];
u64 ctrblk = be64_to_cpu(*(__be64 *)walk->iv);
do {
/* create ctrblks for parallel encrypt */
for (i = 0; i < BF_PARALLEL_BLOCKS; i++) {
if (dst != src)
dst[i] = src[i];
ctrblocks[i] = cpu_to_be64(ctrblk++);
}
blowfish_enc_blk_xor_4way(ctx, (u8 *)dst,
(u8 *)ctrblocks);
src += BF_PARALLEL_BLOCKS;
dst += BF_PARALLEL_BLOCKS;
nbytes -= bsize * BF_PARALLEL_BLOCKS;
} while (nbytes >= bsize * BF_PARALLEL_BLOCKS);
*(__be64 *)walk->iv = cpu_to_be64(ctrblk);
if (nbytes < bsize)
goto done;
}
/* Handle leftovers */
do {
u64 ctrblk;
if (dst != src)
*dst = *src;
ctrblk = *(u64 *)walk->iv;
be64_add_cpu((__be64 *)walk->iv, 1);
blowfish_enc_blk_xor(ctx, (u8 *)dst, (u8 *)&ctrblk);
src += 1;
dst += 1;
} while ((nbytes -= bsize) >= bsize);
done:
return nbytes;
}
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
bool fpu_enabled = false;
struct blkcipher_walk walk;
int err;
blkcipher_walk_init(&walk, dst, src, nbytes);
err = blkcipher_walk_virt_block(desc, &walk, BF_BLOCK_SIZE);
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
while ((nbytes = walk.nbytes) >= BF_BLOCK_SIZE) {
fpu_enabled = bf_fpu_begin(fpu_enabled, nbytes);
nbytes = __ctr_crypt(desc, &walk);
err = blkcipher_walk_done(desc, &walk, nbytes);
}
bf_fpu_end(fpu_enabled);
if (walk.nbytes) {
ctr_crypt_final(desc, &walk);
err = blkcipher_walk_done(desc, &walk, 0);
}
return err;
}
static struct crypto_alg bf_algs[6] = { {
.cra_name = "__ecb-blowfish-avx2",
.cra_driver_name = "__driver-ecb-blowfish-avx2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = BF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct bf_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = BF_MIN_KEY_SIZE,
.max_keysize = BF_MAX_KEY_SIZE,
.setkey = blowfish_setkey,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
},
},
}, {
.cra_name = "__cbc-blowfish-avx2",
.cra_driver_name = "__driver-cbc-blowfish-avx2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = BF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct bf_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = BF_MIN_KEY_SIZE,
.max_keysize = BF_MAX_KEY_SIZE,
.setkey = blowfish_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
},
},
}, {
.cra_name = "__ctr-blowfish-avx2",
.cra_driver_name = "__driver-ctr-blowfish-avx2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct bf_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = BF_MIN_KEY_SIZE,
.max_keysize = BF_MAX_KEY_SIZE,
.ivsize = BF_BLOCK_SIZE,
.setkey = blowfish_setkey,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
},
},
}, {
.cra_name = "ecb(blowfish)",
.cra_driver_name = "ecb-blowfish-avx2",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = BF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = BF_MIN_KEY_SIZE,
.max_keysize = BF_MAX_KEY_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "cbc(blowfish)",
.cra_driver_name = "cbc-blowfish-avx2",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = BF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = BF_MIN_KEY_SIZE,
.max_keysize = BF_MAX_KEY_SIZE,
.ivsize = BF_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = __ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "ctr(blowfish)",
.cra_driver_name = "ctr-blowfish-avx2",
.cra_priority = 400,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = BF_MIN_KEY_SIZE,
.max_keysize = BF_MAX_KEY_SIZE,
.ivsize = BF_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_encrypt,
.geniv = "chainiv",
},
},
} };
static int __init init(void)
{
u64 xcr0;
if (!cpu_has_avx2 || !cpu_has_osxsave) {
pr_info("AVX2 instructions are not detected.\n");
return -ENODEV;
}
xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
pr_info("AVX detected but unusable.\n");
return -ENODEV;
}
return crypto_register_algs(bf_algs, ARRAY_SIZE(bf_algs));
}
static void __exit fini(void)
{
crypto_unregister_algs(bf_algs, ARRAY_SIZE(bf_algs));
}
module_init(init);
module_exit(fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Blowfish Cipher Algorithm, AVX2 optimized");
MODULE_ALIAS("blowfish");
MODULE_ALIAS("blowfish-asm");

View File

@ -1,7 +1,7 @@
/*
* Glue Code for assembler optimized version of Blowfish
*
* Copyright © 2011-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
* Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
* Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
@ -32,24 +32,40 @@
#include <linux/module.h>
#include <linux/types.h>
#include <crypto/algapi.h>
#include <asm/crypto/blowfish.h>
/* regular block cipher functions */
asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
bool xor);
EXPORT_SYMBOL_GPL(__blowfish_enc_blk);
asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src);
EXPORT_SYMBOL_GPL(blowfish_dec_blk);
/* 4-way parallel cipher functions */
asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src, bool xor);
EXPORT_SYMBOL_GPL(__blowfish_enc_blk_4way);
asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(blowfish_dec_blk_4way);
static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src)
{
__blowfish_enc_blk(ctx, dst, src, false);
}
static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst,
const u8 *src)
{
__blowfish_enc_blk(ctx, dst, src, true);
}
static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src)
{
__blowfish_enc_blk_4way(ctx, dst, src, false);
}
static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src)
{
__blowfish_enc_blk_4way(ctx, dst, src, true);
}
static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src)
{

View File

@ -51,16 +51,6 @@
#define ymm14_x xmm14
#define ymm15_x xmm15
/*
* AES-NI instructions do not support ymmX registers, so we need splitting and
* merging.
*/
#define vaesenclast256(zero, yreg, tmp) \
vextracti128 $1, yreg, tmp##_x; \
vaesenclast zero##_x, yreg##_x, yreg##_x; \
vaesenclast zero##_x, tmp##_x, tmp##_x; \
vinserti128 $1, tmp##_x, yreg, yreg;
/**********************************************************************
32-way camellia
**********************************************************************/
@ -79,46 +69,70 @@
* S-function with AES subbytes \
*/ \
vbroadcasti128 .Linv_shift_row, t4; \
vpbroadcastb .L0f0f0f0f, t7; \
vbroadcasti128 .Lpre_tf_lo_s1, t0; \
vbroadcasti128 .Lpre_tf_hi_s1, t1; \
vpbroadcastd .L0f0f0f0f, t7; \
vbroadcasti128 .Lpre_tf_lo_s1, t5; \
vbroadcasti128 .Lpre_tf_hi_s1, t6; \
vbroadcasti128 .Lpre_tf_lo_s4, t2; \
vbroadcasti128 .Lpre_tf_hi_s4, t3; \
\
/* AES inverse shift rows */ \
vpshufb t4, x0, x0; \
vpshufb t4, x7, x7; \
vpshufb t4, x1, x1; \
vpshufb t4, x4, x4; \
vpshufb t4, x2, x2; \
vpshufb t4, x5, x5; \
vpshufb t4, x3, x3; \
vpshufb t4, x6, x6; \
vpshufb t4, x2, x2; \
vpshufb t4, x5, x5; \
vpshufb t4, x1, x1; \
vpshufb t4, x4, x4; \
\
/* prefilter sboxes 1, 2 and 3 */ \
vbroadcasti128 .Lpre_tf_lo_s4, t2; \
vbroadcasti128 .Lpre_tf_hi_s4, t3; \
filter_8bit(x0, t0, t1, t7, t6); \
filter_8bit(x7, t0, t1, t7, t6); \
filter_8bit(x1, t0, t1, t7, t6); \
filter_8bit(x4, t0, t1, t7, t6); \
filter_8bit(x2, t0, t1, t7, t6); \
filter_8bit(x5, t0, t1, t7, t6); \
\
/* prefilter sbox 4 */ \
filter_8bit(x0, t5, t6, t7, t4); \
filter_8bit(x7, t5, t6, t7, t4); \
vextracti128 $1, x0, t0##_x; \
vextracti128 $1, x7, t1##_x; \
filter_8bit(x3, t2, t3, t7, t4); \
filter_8bit(x6, t2, t3, t7, t4); \
vextracti128 $1, x3, t3##_x; \
vextracti128 $1, x6, t2##_x; \
filter_8bit(x2, t5, t6, t7, t4); \
filter_8bit(x5, t5, t6, t7, t4); \
filter_8bit(x1, t5, t6, t7, t4); \
filter_8bit(x4, t5, t6, t7, t4); \
\
vpxor t4##_x, t4##_x, t4##_x; \
filter_8bit(x3, t2, t3, t7, t6); \
filter_8bit(x6, t2, t3, t7, t6); \
\
/* AES subbytes + AES shift rows */ \
vextracti128 $1, x2, t6##_x; \
vextracti128 $1, x5, t5##_x; \
vaesenclast t4##_x, x0##_x, x0##_x; \
vaesenclast t4##_x, t0##_x, t0##_x; \
vinserti128 $1, t0##_x, x0, x0; \
vaesenclast t4##_x, x7##_x, x7##_x; \
vaesenclast t4##_x, t1##_x, t1##_x; \
vinserti128 $1, t1##_x, x7, x7; \
vaesenclast t4##_x, x3##_x, x3##_x; \
vaesenclast t4##_x, t3##_x, t3##_x; \
vinserti128 $1, t3##_x, x3, x3; \
vaesenclast t4##_x, x6##_x, x6##_x; \
vaesenclast t4##_x, t2##_x, t2##_x; \
vinserti128 $1, t2##_x, x6, x6; \
vextracti128 $1, x1, t3##_x; \
vextracti128 $1, x4, t2##_x; \
vbroadcasti128 .Lpost_tf_lo_s1, t0; \
vbroadcasti128 .Lpost_tf_hi_s1, t1; \
vaesenclast256(t4, x0, t5); \
vaesenclast256(t4, x7, t5); \
vaesenclast256(t4, x1, t5); \
vaesenclast256(t4, x4, t5); \
vaesenclast256(t4, x2, t5); \
vaesenclast256(t4, x5, t5); \
vaesenclast256(t4, x3, t5); \
vaesenclast256(t4, x6, t5); \
vaesenclast t4##_x, x2##_x, x2##_x; \
vaesenclast t4##_x, t6##_x, t6##_x; \
vinserti128 $1, t6##_x, x2, x2; \
vaesenclast t4##_x, x5##_x, x5##_x; \
vaesenclast t4##_x, t5##_x, t5##_x; \
vinserti128 $1, t5##_x, x5, x5; \
vaesenclast t4##_x, x1##_x, x1##_x; \
vaesenclast t4##_x, t3##_x, t3##_x; \
vinserti128 $1, t3##_x, x1, x1; \
vaesenclast t4##_x, x4##_x, x4##_x; \
vaesenclast t4##_x, t2##_x, t2##_x; \
vinserti128 $1, t2##_x, x4, x4; \
\
/* postfilter sboxes 1 and 4 */ \
vbroadcasti128 .Lpost_tf_lo_s3, t2; \
@ -139,22 +153,12 @@
/* postfilter sbox 2 */ \
filter_8bit(x1, t4, t5, t7, t2); \
filter_8bit(x4, t4, t5, t7, t2); \
vpxor t7, t7, t7; \
\
vpsrldq $1, t0, t1; \
vpsrldq $2, t0, t2; \
vpshufb t7, t1, t1; \
vpsrldq $3, t0, t3; \
vpsrldq $4, t0, t4; \
vpsrldq $5, t0, t5; \
vpsrldq $6, t0, t6; \
vpsrldq $7, t0, t7; \
vpbroadcastb t0##_x, t0; \
vpbroadcastb t1##_x, t1; \
vpbroadcastb t2##_x, t2; \
vpbroadcastb t3##_x, t3; \
vpbroadcastb t4##_x, t4; \
vpbroadcastb t6##_x, t6; \
vpbroadcastb t5##_x, t5; \
vpbroadcastb t7##_x, t7; \
\
/* P-function */ \
vpxor x5, x0, x0; \
@ -162,11 +166,21 @@
vpxor x7, x2, x2; \
vpxor x4, x3, x3; \
\
vpshufb t7, t2, t2; \
vpsrldq $4, t0, t4; \
vpshufb t7, t3, t3; \
vpsrldq $5, t0, t5; \
vpshufb t7, t4, t4; \
\
vpxor x2, x4, x4; \
vpxor x3, x5, x5; \
vpxor x0, x6, x6; \
vpxor x1, x7, x7; \
\
vpsrldq $6, t0, t6; \
vpshufb t7, t5, t5; \
vpshufb t7, t6, t6; \
\
vpxor x7, x0, x0; \
vpxor x4, x1, x1; \
vpxor x5, x2, x2; \
@ -179,12 +193,16 @@
\
/* Add key material and result to CD (x becomes new CD) */ \
\
vpxor t7, x0, x0; \
vpxor 4 * 32(mem_cd), x0, x0; \
\
vpxor t6, x1, x1; \
vpxor 5 * 32(mem_cd), x1, x1; \
\
vpsrldq $7, t0, t6; \
vpshufb t7, t0, t0; \
vpshufb t7, t6, t7; \
\
vpxor t7, x0, x0; \
vpxor 4 * 32(mem_cd), x0, x0; \
\
vpxor t5, x2, x2; \
vpxor 6 * 32(mem_cd), x2, x2; \
\
@ -204,7 +222,7 @@
vpxor 3 * 32(mem_cd), x7, x7;
/*
* Size optimization... with inlined roundsm16 binary would be over 5 times
* Size optimization... with inlined roundsm32 binary would be over 5 times
* larger and would only marginally faster.
*/
.align 8
@ -324,13 +342,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
*/ \
vpbroadcastd kll, t0; /* only lowest 32-bit used */ \
vpxor tt0, tt0, tt0; \
vpbroadcastb t0##_x, t3; \
vpshufb tt0, t0, t3; \
vpsrldq $1, t0, t0; \
vpbroadcastb t0##_x, t2; \
vpshufb tt0, t0, t2; \
vpsrldq $1, t0, t0; \
vpbroadcastb t0##_x, t1; \
vpshufb tt0, t0, t1; \
vpsrldq $1, t0, t0; \
vpbroadcastb t0##_x, t0; \
vpshufb tt0, t0, t0; \
\
vpand l0, t0, t0; \
vpand l1, t1, t1; \
@ -340,6 +358,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
rol32_1_32(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \
\
vpxor l4, t0, l4; \
vpbroadcastd krr, t0; /* only lowest 32-bit used */ \
vmovdqu l4, 4 * 32(l); \
vpxor l5, t1, l5; \
vmovdqu l5, 5 * 32(l); \
@ -354,14 +373,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
* rl ^= t2; \
*/ \
\
vpbroadcastd krr, t0; /* only lowest 32-bit used */ \
vpbroadcastb t0##_x, t3; \
vpshufb tt0, t0, t3; \
vpsrldq $1, t0, t0; \
vpbroadcastb t0##_x, t2; \
vpshufb tt0, t0, t2; \
vpsrldq $1, t0, t0; \
vpbroadcastb t0##_x, t1; \
vpshufb tt0, t0, t1; \
vpsrldq $1, t0, t0; \
vpbroadcastb t0##_x, t0; \
vpshufb tt0, t0, t0; \
\
vpor 4 * 32(r), t0, t0; \
vpor 5 * 32(r), t1, t1; \
@ -373,6 +391,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
vpxor 2 * 32(r), t2, t2; \
vpxor 3 * 32(r), t3, t3; \
vmovdqu t0, 0 * 32(r); \
vpbroadcastd krl, t0; /* only lowest 32-bit used */ \
vmovdqu t1, 1 * 32(r); \
vmovdqu t2, 2 * 32(r); \
vmovdqu t3, 3 * 32(r); \
@ -382,14 +401,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
* t2 &= rl; \
* rr ^= rol32(t2, 1); \
*/ \
vpbroadcastd krl, t0; /* only lowest 32-bit used */ \
vpbroadcastb t0##_x, t3; \
vpshufb tt0, t0, t3; \
vpsrldq $1, t0, t0; \
vpbroadcastb t0##_x, t2; \
vpshufb tt0, t0, t2; \
vpsrldq $1, t0, t0; \
vpbroadcastb t0##_x, t1; \
vpshufb tt0, t0, t1; \
vpsrldq $1, t0, t0; \
vpbroadcastb t0##_x, t0; \
vpshufb tt0, t0, t0; \
\
vpand 0 * 32(r), t0, t0; \
vpand 1 * 32(r), t1, t1; \
@ -403,6 +421,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
vpxor 6 * 32(r), t2, t2; \
vpxor 7 * 32(r), t3, t3; \
vmovdqu t0, 4 * 32(r); \
vpbroadcastd klr, t0; /* only lowest 32-bit used */ \
vmovdqu t1, 5 * 32(r); \
vmovdqu t2, 6 * 32(r); \
vmovdqu t3, 7 * 32(r); \
@ -413,14 +432,13 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
* ll ^= t0; \
*/ \
\
vpbroadcastd klr, t0; /* only lowest 32-bit used */ \
vpbroadcastb t0##_x, t3; \
vpshufb tt0, t0, t3; \
vpsrldq $1, t0, t0; \
vpbroadcastb t0##_x, t2; \
vpshufb tt0, t0, t2; \
vpsrldq $1, t0, t0; \
vpbroadcastb t0##_x, t1; \
vpshufb tt0, t0, t1; \
vpsrldq $1, t0, t0; \
vpbroadcastb t0##_x, t0; \
vpshufb tt0, t0, t0; \
\
vpor l4, t0, t0; \
vpor l5, t1, t1; \

View File

@ -0,0 +1,643 @@
########################################################################
# Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions
#
# Copyright (c) 2013, Intel Corporation
#
# Authors:
# Erdinc Ozturk <erdinc.ozturk@intel.com>
# Vinodh Gopal <vinodh.gopal@intel.com>
# James Guilford <james.guilford@intel.com>
# Tim Chen <tim.c.chen@linux.intel.com>
#
# This software is available to you under a choice of one of two
# licenses. You may choose to be licensed under the terms of the GNU
# General Public License (GPL) Version 2, available from the file
# COPYING in the main directory of this source tree, or the
# OpenIB.org BSD license below:
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the
# distribution.
#
# * Neither the name of the Intel Corporation nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
#
# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
########################################################################
# Function API:
# UINT16 crc_t10dif_pcl(
# UINT16 init_crc, //initial CRC value, 16 bits
# const unsigned char *buf, //buffer pointer to calculate CRC on
# UINT64 len //buffer length in bytes (64-bit data)
# );
#
# Reference paper titled "Fast CRC Computation for Generic
# Polynomials Using PCLMULQDQ Instruction"
# URL: http://www.intel.com/content/dam/www/public/us/en/documents
# /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
#
#
#include <linux/linkage.h>
.text
#define arg1 %rdi
#define arg2 %rsi
#define arg3 %rdx
#define arg1_low32 %edi
ENTRY(crc_t10dif_pcl)
.align 16
# adjust the 16-bit initial_crc value, scale it to 32 bits
shl $16, arg1_low32
# Allocate Stack Space
mov %rsp, %rcx
sub $16*2, %rsp
# align stack to 16 byte boundary
and $~(0x10 - 1), %rsp
# check if smaller than 256
cmp $256, arg3
# for sizes less than 128, we can't fold 64B at a time...
jl _less_than_128
# load the initial crc value
movd arg1_low32, %xmm10 # initial crc
# crc value does not need to be byte-reflected, but it needs
# to be moved to the high part of the register.
# because data will be byte-reflected and will align with
# initial crc at correct place.
pslldq $12, %xmm10
movdqa SHUF_MASK(%rip), %xmm11
# receive the initial 64B data, xor the initial crc value
movdqu 16*0(arg2), %xmm0
movdqu 16*1(arg2), %xmm1
movdqu 16*2(arg2), %xmm2
movdqu 16*3(arg2), %xmm3
movdqu 16*4(arg2), %xmm4
movdqu 16*5(arg2), %xmm5
movdqu 16*6(arg2), %xmm6
movdqu 16*7(arg2), %xmm7
pshufb %xmm11, %xmm0
# XOR the initial_crc value
pxor %xmm10, %xmm0
pshufb %xmm11, %xmm1
pshufb %xmm11, %xmm2
pshufb %xmm11, %xmm3
pshufb %xmm11, %xmm4
pshufb %xmm11, %xmm5
pshufb %xmm11, %xmm6
pshufb %xmm11, %xmm7
movdqa rk3(%rip), %xmm10 #xmm10 has rk3 and rk4
#imm value of pclmulqdq instruction
#will determine which constant to use
#################################################################
# we subtract 256 instead of 128 to save one instruction from the loop
sub $256, arg3
# at this section of the code, there is 64*x+y (0<=y<64) bytes of
# buffer. The _fold_64_B_loop will fold 64B at a time
# until we have 64+y Bytes of buffer
# fold 64B at a time. This section of the code folds 4 xmm
# registers in parallel
_fold_64_B_loop:
# update the buffer pointer
add $128, arg2 # buf += 64#
movdqu 16*0(arg2), %xmm9
movdqu 16*1(arg2), %xmm12
pshufb %xmm11, %xmm9
pshufb %xmm11, %xmm12
movdqa %xmm0, %xmm8
movdqa %xmm1, %xmm13
pclmulqdq $0x0 , %xmm10, %xmm0
pclmulqdq $0x11, %xmm10, %xmm8
pclmulqdq $0x0 , %xmm10, %xmm1
pclmulqdq $0x11, %xmm10, %xmm13
pxor %xmm9 , %xmm0
xorps %xmm8 , %xmm0
pxor %xmm12, %xmm1
xorps %xmm13, %xmm1
movdqu 16*2(arg2), %xmm9
movdqu 16*3(arg2), %xmm12
pshufb %xmm11, %xmm9
pshufb %xmm11, %xmm12
movdqa %xmm2, %xmm8
movdqa %xmm3, %xmm13
pclmulqdq $0x0, %xmm10, %xmm2
pclmulqdq $0x11, %xmm10, %xmm8
pclmulqdq $0x0, %xmm10, %xmm3
pclmulqdq $0x11, %xmm10, %xmm13
pxor %xmm9 , %xmm2
xorps %xmm8 , %xmm2
pxor %xmm12, %xmm3
xorps %xmm13, %xmm3
movdqu 16*4(arg2), %xmm9
movdqu 16*5(arg2), %xmm12
pshufb %xmm11, %xmm9
pshufb %xmm11, %xmm12
movdqa %xmm4, %xmm8
movdqa %xmm5, %xmm13
pclmulqdq $0x0, %xmm10, %xmm4
pclmulqdq $0x11, %xmm10, %xmm8
pclmulqdq $0x0, %xmm10, %xmm5
pclmulqdq $0x11, %xmm10, %xmm13
pxor %xmm9 , %xmm4
xorps %xmm8 , %xmm4
pxor %xmm12, %xmm5
xorps %xmm13, %xmm5
movdqu 16*6(arg2), %xmm9
movdqu 16*7(arg2), %xmm12
pshufb %xmm11, %xmm9
pshufb %xmm11, %xmm12
movdqa %xmm6 , %xmm8
movdqa %xmm7 , %xmm13
pclmulqdq $0x0 , %xmm10, %xmm6
pclmulqdq $0x11, %xmm10, %xmm8
pclmulqdq $0x0 , %xmm10, %xmm7
pclmulqdq $0x11, %xmm10, %xmm13
pxor %xmm9 , %xmm6
xorps %xmm8 , %xmm6
pxor %xmm12, %xmm7
xorps %xmm13, %xmm7
sub $128, arg3
# check if there is another 64B in the buffer to be able to fold
jge _fold_64_B_loop
##################################################################
add $128, arg2
# at this point, the buffer pointer is pointing at the last y Bytes
# of the buffer the 64B of folded data is in 4 of the xmm
# registers: xmm0, xmm1, xmm2, xmm3
# fold the 8 xmm registers to 1 xmm register with different constants
movdqa rk9(%rip), %xmm10
movdqa %xmm0, %xmm8
pclmulqdq $0x11, %xmm10, %xmm0
pclmulqdq $0x0 , %xmm10, %xmm8
pxor %xmm8, %xmm7
xorps %xmm0, %xmm7
movdqa rk11(%rip), %xmm10
movdqa %xmm1, %xmm8
pclmulqdq $0x11, %xmm10, %xmm1
pclmulqdq $0x0 , %xmm10, %xmm8
pxor %xmm8, %xmm7
xorps %xmm1, %xmm7
movdqa rk13(%rip), %xmm10
movdqa %xmm2, %xmm8
pclmulqdq $0x11, %xmm10, %xmm2
pclmulqdq $0x0 , %xmm10, %xmm8
pxor %xmm8, %xmm7
pxor %xmm2, %xmm7
movdqa rk15(%rip), %xmm10
movdqa %xmm3, %xmm8
pclmulqdq $0x11, %xmm10, %xmm3
pclmulqdq $0x0 , %xmm10, %xmm8
pxor %xmm8, %xmm7
xorps %xmm3, %xmm7
movdqa rk17(%rip), %xmm10
movdqa %xmm4, %xmm8
pclmulqdq $0x11, %xmm10, %xmm4
pclmulqdq $0x0 , %xmm10, %xmm8
pxor %xmm8, %xmm7
pxor %xmm4, %xmm7
movdqa rk19(%rip), %xmm10
movdqa %xmm5, %xmm8
pclmulqdq $0x11, %xmm10, %xmm5
pclmulqdq $0x0 , %xmm10, %xmm8
pxor %xmm8, %xmm7
xorps %xmm5, %xmm7
movdqa rk1(%rip), %xmm10 #xmm10 has rk1 and rk2
#imm value of pclmulqdq instruction
#will determine which constant to use
movdqa %xmm6, %xmm8
pclmulqdq $0x11, %xmm10, %xmm6
pclmulqdq $0x0 , %xmm10, %xmm8
pxor %xmm8, %xmm7
pxor %xmm6, %xmm7
# instead of 64, we add 48 to the loop counter to save 1 instruction
# from the loop instead of a cmp instruction, we use the negative
# flag with the jl instruction
add $128-16, arg3
jl _final_reduction_for_128
# now we have 16+y bytes left to reduce. 16 Bytes is in register xmm7
# and the rest is in memory. We can fold 16 bytes at a time if y>=16
# continue folding 16B at a time
_16B_reduction_loop:
movdqa %xmm7, %xmm8
pclmulqdq $0x11, %xmm10, %xmm7
pclmulqdq $0x0 , %xmm10, %xmm8
pxor %xmm8, %xmm7
movdqu (arg2), %xmm0
pshufb %xmm11, %xmm0
pxor %xmm0 , %xmm7
add $16, arg2
sub $16, arg3
# instead of a cmp instruction, we utilize the flags with the
# jge instruction equivalent of: cmp arg3, 16-16
# check if there is any more 16B in the buffer to be able to fold
jge _16B_reduction_loop
#now we have 16+z bytes left to reduce, where 0<= z < 16.
#first, we reduce the data in the xmm7 register
_final_reduction_for_128:
# check if any more data to fold. If not, compute the CRC of
# the final 128 bits
add $16, arg3
je _128_done
# here we are getting data that is less than 16 bytes.
# since we know that there was data before the pointer, we can
# offset the input pointer before the actual point, to receive
# exactly 16 bytes. after that the registers need to be adjusted.
_get_last_two_xmms:
movdqa %xmm7, %xmm2
movdqu -16(arg2, arg3), %xmm1
pshufb %xmm11, %xmm1
# get rid of the extra data that was loaded before
# load the shift constant
lea pshufb_shf_table+16(%rip), %rax
sub arg3, %rax
movdqu (%rax), %xmm0
# shift xmm2 to the left by arg3 bytes
pshufb %xmm0, %xmm2
# shift xmm7 to the right by 16-arg3 bytes
pxor mask1(%rip), %xmm0
pshufb %xmm0, %xmm7
pblendvb %xmm2, %xmm1 #xmm0 is implicit
# fold 16 Bytes
movdqa %xmm1, %xmm2
movdqa %xmm7, %xmm8
pclmulqdq $0x11, %xmm10, %xmm7
pclmulqdq $0x0 , %xmm10, %xmm8
pxor %xmm8, %xmm7
pxor %xmm2, %xmm7
_128_done:
# compute crc of a 128-bit value
movdqa rk5(%rip), %xmm10 # rk5 and rk6 in xmm10
movdqa %xmm7, %xmm0
#64b fold
pclmulqdq $0x1, %xmm10, %xmm7
pslldq $8 , %xmm0
pxor %xmm0, %xmm7
#32b fold
movdqa %xmm7, %xmm0
pand mask2(%rip), %xmm0
psrldq $12, %xmm7
pclmulqdq $0x10, %xmm10, %xmm7
pxor %xmm0, %xmm7
#barrett reduction
_barrett:
movdqa rk7(%rip), %xmm10 # rk7 and rk8 in xmm10
movdqa %xmm7, %xmm0
pclmulqdq $0x01, %xmm10, %xmm7
pslldq $4, %xmm7
pclmulqdq $0x11, %xmm10, %xmm7
pslldq $4, %xmm7
pxor %xmm0, %xmm7
pextrd $1, %xmm7, %eax
_cleanup:
# scale the result back to 16 bits
shr $16, %eax
mov %rcx, %rsp
ret
########################################################################
.align 16
_less_than_128:
# check if there is enough buffer to be able to fold 16B at a time
cmp $32, arg3
jl _less_than_32
movdqa SHUF_MASK(%rip), %xmm11
# now if there is, load the constants
movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10
movd arg1_low32, %xmm0 # get the initial crc value
pslldq $12, %xmm0 # align it to its correct place
movdqu (arg2), %xmm7 # load the plaintext
pshufb %xmm11, %xmm7 # byte-reflect the plaintext
pxor %xmm0, %xmm7
# update the buffer pointer
add $16, arg2
# update the counter. subtract 32 instead of 16 to save one
# instruction from the loop
sub $32, arg3
jmp _16B_reduction_loop
.align 16
_less_than_32:
# mov initial crc to the return value. this is necessary for
# zero-length buffers.
mov arg1_low32, %eax
test arg3, arg3
je _cleanup
movdqa SHUF_MASK(%rip), %xmm11
movd arg1_low32, %xmm0 # get the initial crc value
pslldq $12, %xmm0 # align it to its correct place
cmp $16, arg3
je _exact_16_left
jl _less_than_16_left
movdqu (arg2), %xmm7 # load the plaintext
pshufb %xmm11, %xmm7 # byte-reflect the plaintext
pxor %xmm0 , %xmm7 # xor the initial crc value
add $16, arg2
sub $16, arg3
movdqa rk1(%rip), %xmm10 # rk1 and rk2 in xmm10
jmp _get_last_two_xmms
.align 16
_less_than_16_left:
# use stack space to load data less than 16 bytes, zero-out
# the 16B in memory first.
pxor %xmm1, %xmm1
mov %rsp, %r11
movdqa %xmm1, (%r11)
cmp $4, arg3
jl _only_less_than_4
# backup the counter value
mov arg3, %r9
cmp $8, arg3
jl _less_than_8_left
# load 8 Bytes
mov (arg2), %rax
mov %rax, (%r11)
add $8, %r11
sub $8, arg3
add $8, arg2
_less_than_8_left:
cmp $4, arg3
jl _less_than_4_left
# load 4 Bytes
mov (arg2), %eax
mov %eax, (%r11)
add $4, %r11
sub $4, arg3
add $4, arg2
_less_than_4_left:
cmp $2, arg3
jl _less_than_2_left
# load 2 Bytes
mov (arg2), %ax
mov %ax, (%r11)
add $2, %r11
sub $2, arg3
add $2, arg2
_less_than_2_left:
cmp $1, arg3
jl _zero_left
# load 1 Byte
mov (arg2), %al
mov %al, (%r11)
_zero_left:
movdqa (%rsp), %xmm7
pshufb %xmm11, %xmm7
pxor %xmm0 , %xmm7 # xor the initial crc value
# shl r9, 4
lea pshufb_shf_table+16(%rip), %rax
sub %r9, %rax
movdqu (%rax), %xmm0
pxor mask1(%rip), %xmm0
pshufb %xmm0, %xmm7
jmp _128_done
.align 16
_exact_16_left:
movdqu (arg2), %xmm7
pshufb %xmm11, %xmm7
pxor %xmm0 , %xmm7 # xor the initial crc value
jmp _128_done
_only_less_than_4:
cmp $3, arg3
jl _only_less_than_3
# load 3 Bytes
mov (arg2), %al
mov %al, (%r11)
mov 1(arg2), %al
mov %al, 1(%r11)
mov 2(arg2), %al
mov %al, 2(%r11)
movdqa (%rsp), %xmm7
pshufb %xmm11, %xmm7
pxor %xmm0 , %xmm7 # xor the initial crc value
psrldq $5, %xmm7
jmp _barrett
_only_less_than_3:
cmp $2, arg3
jl _only_less_than_2
# load 2 Bytes
mov (arg2), %al
mov %al, (%r11)
mov 1(arg2), %al
mov %al, 1(%r11)
movdqa (%rsp), %xmm7
pshufb %xmm11, %xmm7
pxor %xmm0 , %xmm7 # xor the initial crc value
psrldq $6, %xmm7
jmp _barrett
_only_less_than_2:
# load 1 Byte
mov (arg2), %al
mov %al, (%r11)
movdqa (%rsp), %xmm7
pshufb %xmm11, %xmm7
pxor %xmm0 , %xmm7 # xor the initial crc value
psrldq $7, %xmm7
jmp _barrett
ENDPROC(crc_t10dif_pcl)
.data
# precomputed constants
# these constants are precomputed from the poly:
# 0x8bb70000 (0x8bb7 scaled to 32 bits)
.align 16
# Q = 0x18BB70000
# rk1 = 2^(32*3) mod Q << 32
# rk2 = 2^(32*5) mod Q << 32
# rk3 = 2^(32*15) mod Q << 32
# rk4 = 2^(32*17) mod Q << 32
# rk5 = 2^(32*3) mod Q << 32
# rk6 = 2^(32*2) mod Q << 32
# rk7 = floor(2^64/Q)
# rk8 = Q
rk1:
.quad 0x2d56000000000000
rk2:
.quad 0x06df000000000000
rk3:
.quad 0x9d9d000000000000
rk4:
.quad 0x7cf5000000000000
rk5:
.quad 0x2d56000000000000
rk6:
.quad 0x1368000000000000
rk7:
.quad 0x00000001f65a57f8
rk8:
.quad 0x000000018bb70000
rk9:
.quad 0xceae000000000000
rk10:
.quad 0xbfd6000000000000
rk11:
.quad 0x1e16000000000000
rk12:
.quad 0x713c000000000000
rk13:
.quad 0xf7f9000000000000
rk14:
.quad 0x80a6000000000000
rk15:
.quad 0x044c000000000000
rk16:
.quad 0xe658000000000000
rk17:
.quad 0xad18000000000000
rk18:
.quad 0xa497000000000000
rk19:
.quad 0x6ee3000000000000
rk20:
.quad 0xe7b5000000000000
mask1:
.octa 0x80808080808080808080808080808080
mask2:
.octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF
SHUF_MASK:
.octa 0x000102030405060708090A0B0C0D0E0F
pshufb_shf_table:
# use these values for shift constants for the pshufb instruction
# different alignments result in values as shown:
# DDQ 0x008f8e8d8c8b8a898887868584838281 # shl 15 (16-1) / shr1
# DDQ 0x01008f8e8d8c8b8a8988878685848382 # shl 14 (16-3) / shr2
# DDQ 0x0201008f8e8d8c8b8a89888786858483 # shl 13 (16-4) / shr3
# DDQ 0x030201008f8e8d8c8b8a898887868584 # shl 12 (16-4) / shr4
# DDQ 0x04030201008f8e8d8c8b8a8988878685 # shl 11 (16-5) / shr5
# DDQ 0x0504030201008f8e8d8c8b8a89888786 # shl 10 (16-6) / shr6
# DDQ 0x060504030201008f8e8d8c8b8a898887 # shl 9 (16-7) / shr7
# DDQ 0x07060504030201008f8e8d8c8b8a8988 # shl 8 (16-8) / shr8
# DDQ 0x0807060504030201008f8e8d8c8b8a89 # shl 7 (16-9) / shr9
# DDQ 0x090807060504030201008f8e8d8c8b8a # shl 6 (16-10) / shr10
# DDQ 0x0a090807060504030201008f8e8d8c8b # shl 5 (16-11) / shr11
# DDQ 0x0b0a090807060504030201008f8e8d8c # shl 4 (16-12) / shr12
# DDQ 0x0c0b0a090807060504030201008f8e8d # shl 3 (16-13) / shr13
# DDQ 0x0d0c0b0a090807060504030201008f8e # shl 2 (16-14) / shr14
# DDQ 0x0e0d0c0b0a090807060504030201008f # shl 1 (16-15) / shr15
.octa 0x8f8e8d8c8b8a89888786858483828100
.octa 0x000e0d0c0b0a09080706050403020100

View File

@ -0,0 +1,151 @@
/*
* Cryptographic API.
*
* T10 Data Integrity Field CRC16 Crypto Transform using PCLMULQDQ Instructions
*
* Copyright (C) 2013 Intel Corporation
* Author: Tim Chen <tim.c.chen@linux.intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#include <linux/types.h>
#include <linux/module.h>
#include <linux/crc-t10dif.h>
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/kernel.h>
#include <asm/i387.h>
#include <asm/cpufeature.h>
#include <asm/cpu_device_id.h>
asmlinkage __u16 crc_t10dif_pcl(__u16 crc, const unsigned char *buf,
size_t len);
struct chksum_desc_ctx {
__u16 crc;
};
/*
* Steps through buffer one byte at at time, calculates reflected
* crc using table.
*/
static int chksum_init(struct shash_desc *desc)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
ctx->crc = 0;
return 0;
}
static int chksum_update(struct shash_desc *desc, const u8 *data,
unsigned int length)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
if (irq_fpu_usable()) {
kernel_fpu_begin();
ctx->crc = crc_t10dif_pcl(ctx->crc, data, length);
kernel_fpu_end();
} else
ctx->crc = crc_t10dif_generic(ctx->crc, data, length);
return 0;
}
static int chksum_final(struct shash_desc *desc, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
*(__u16 *)out = ctx->crc;
return 0;
}
static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len,
u8 *out)
{
if (irq_fpu_usable()) {
kernel_fpu_begin();
*(__u16 *)out = crc_t10dif_pcl(*crcp, data, len);
kernel_fpu_end();
} else
*(__u16 *)out = crc_t10dif_generic(*crcp, data, len);
return 0;
}
static int chksum_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
return __chksum_finup(&ctx->crc, data, len, out);
}
static int chksum_digest(struct shash_desc *desc, const u8 *data,
unsigned int length, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
return __chksum_finup(&ctx->crc, data, length, out);
}
static struct shash_alg alg = {
.digestsize = CRC_T10DIF_DIGEST_SIZE,
.init = chksum_init,
.update = chksum_update,
.final = chksum_final,
.finup = chksum_finup,
.digest = chksum_digest,
.descsize = sizeof(struct chksum_desc_ctx),
.base = {
.cra_name = "crct10dif",
.cra_driver_name = "crct10dif-pclmul",
.cra_priority = 200,
.cra_blocksize = CRC_T10DIF_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
static const struct x86_cpu_id crct10dif_cpu_id[] = {
X86_FEATURE_MATCH(X86_FEATURE_PCLMULQDQ),
{}
};
MODULE_DEVICE_TABLE(x86cpu, crct10dif_cpu_id);
static int __init crct10dif_intel_mod_init(void)
{
if (!x86_match_cpu(crct10dif_cpu_id))
return -ENODEV;
return crypto_register_shash(&alg);
}
static void __exit crct10dif_intel_mod_fini(void)
{
crypto_unregister_shash(&alg);
}
module_init(crct10dif_intel_mod_init);
module_exit(crct10dif_intel_mod_fini);
MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>");
MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with PCLMULQDQ.");
MODULE_LICENSE("GPL");
MODULE_ALIAS("crct10dif");
MODULE_ALIAS("crct10dif-pclmul");

View File

@ -187,7 +187,36 @@ static int sha256_ssse3_import(struct shash_desc *desc, const void *in)
return 0;
}
static struct shash_alg alg = {
static int sha224_ssse3_init(struct shash_desc *desc)
{
struct sha256_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA224_H0;
sctx->state[1] = SHA224_H1;
sctx->state[2] = SHA224_H2;
sctx->state[3] = SHA224_H3;
sctx->state[4] = SHA224_H4;
sctx->state[5] = SHA224_H5;
sctx->state[6] = SHA224_H6;
sctx->state[7] = SHA224_H7;
sctx->count = 0;
return 0;
}
static int sha224_ssse3_final(struct shash_desc *desc, u8 *hash)
{
u8 D[SHA256_DIGEST_SIZE];
sha256_ssse3_final(desc, D);
memcpy(hash, D, SHA224_DIGEST_SIZE);
memset(D, 0, SHA256_DIGEST_SIZE);
return 0;
}
static struct shash_alg algs[] = { {
.digestsize = SHA256_DIGEST_SIZE,
.init = sha256_ssse3_init,
.update = sha256_ssse3_update,
@ -204,7 +233,24 @@ static struct shash_alg alg = {
.cra_blocksize = SHA256_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
}, {
.digestsize = SHA224_DIGEST_SIZE,
.init = sha224_ssse3_init,
.update = sha256_ssse3_update,
.final = sha224_ssse3_final,
.export = sha256_ssse3_export,
.import = sha256_ssse3_import,
.descsize = sizeof(struct sha256_state),
.statesize = sizeof(struct sha256_state),
.base = {
.cra_name = "sha224",
.cra_driver_name = "sha224-ssse3",
.cra_priority = 150,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA224_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
} };
#ifdef CONFIG_AS_AVX
static bool __init avx_usable(void)
@ -227,7 +273,7 @@ static bool __init avx_usable(void)
static int __init sha256_ssse3_mod_init(void)
{
/* test for SSE3 first */
/* test for SSSE3 first */
if (cpu_has_ssse3)
sha256_transform_asm = sha256_transform_ssse3;
@ -254,7 +300,7 @@ static int __init sha256_ssse3_mod_init(void)
else
#endif
pr_info("Using SSSE3 optimized SHA-256 implementation\n");
return crypto_register_shash(&alg);
return crypto_register_shashes(algs, ARRAY_SIZE(algs));
}
pr_info("Neither AVX nor SSSE3 is available/usable.\n");
@ -263,7 +309,7 @@ static int __init sha256_ssse3_mod_init(void)
static void __exit sha256_ssse3_mod_fini(void)
{
crypto_unregister_shash(&alg);
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
}
module_init(sha256_ssse3_mod_init);
@ -273,3 +319,4 @@ MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated");
MODULE_ALIAS("sha256");
MODULE_ALIAS("sha384");

View File

@ -194,7 +194,37 @@ static int sha512_ssse3_import(struct shash_desc *desc, const void *in)
return 0;
}
static struct shash_alg alg = {
static int sha384_ssse3_init(struct shash_desc *desc)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA384_H0;
sctx->state[1] = SHA384_H1;
sctx->state[2] = SHA384_H2;
sctx->state[3] = SHA384_H3;
sctx->state[4] = SHA384_H4;
sctx->state[5] = SHA384_H5;
sctx->state[6] = SHA384_H6;
sctx->state[7] = SHA384_H7;
sctx->count[0] = sctx->count[1] = 0;
return 0;
}
static int sha384_ssse3_final(struct shash_desc *desc, u8 *hash)
{
u8 D[SHA512_DIGEST_SIZE];
sha512_ssse3_final(desc, D);
memcpy(hash, D, SHA384_DIGEST_SIZE);
memset(D, 0, SHA512_DIGEST_SIZE);
return 0;
}
static struct shash_alg algs[] = { {
.digestsize = SHA512_DIGEST_SIZE,
.init = sha512_ssse3_init,
.update = sha512_ssse3_update,
@ -211,7 +241,24 @@ static struct shash_alg alg = {
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
}, {
.digestsize = SHA384_DIGEST_SIZE,
.init = sha384_ssse3_init,
.update = sha512_ssse3_update,
.final = sha384_ssse3_final,
.export = sha512_ssse3_export,
.import = sha512_ssse3_import,
.descsize = sizeof(struct sha512_state),
.statesize = sizeof(struct sha512_state),
.base = {
.cra_name = "sha384",
.cra_driver_name = "sha384-ssse3",
.cra_priority = 150,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA384_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
} };
#ifdef CONFIG_AS_AVX
static bool __init avx_usable(void)
@ -234,7 +281,7 @@ static bool __init avx_usable(void)
static int __init sha512_ssse3_mod_init(void)
{
/* test for SSE3 first */
/* test for SSSE3 first */
if (cpu_has_ssse3)
sha512_transform_asm = sha512_transform_ssse3;
@ -261,7 +308,7 @@ static int __init sha512_ssse3_mod_init(void)
else
#endif
pr_info("Using SSSE3 optimized SHA-512 implementation\n");
return crypto_register_shash(&alg);
return crypto_register_shashes(algs, ARRAY_SIZE(algs));
}
pr_info("Neither AVX nor SSSE3 is available/usable.\n");
@ -270,7 +317,7 @@ static int __init sha512_ssse3_mod_init(void)
static void __exit sha512_ssse3_mod_fini(void)
{
crypto_unregister_shash(&alg);
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
}
module_init(sha512_ssse3_mod_init);
@ -280,3 +327,4 @@ MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, Supplemental SSE3 accelerated");
MODULE_ALIAS("sha512");
MODULE_ALIAS("sha384");

View File

@ -1,600 +0,0 @@
/*
* x86_64/AVX2 assembler optimized version of Twofish
*
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
*/
#include <linux/linkage.h>
#include "glue_helper-asm-avx2.S"
.file "twofish-avx2-asm_64.S"
.data
.align 16
.Lvpshufb_mask0:
.long 0x80808000
.long 0x80808004
.long 0x80808008
.long 0x8080800c
.Lbswap128_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
.Lxts_gf128mul_and_shl1_mask_0:
.byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0
.Lxts_gf128mul_and_shl1_mask_1:
.byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0
.text
/* structure of crypto context */
#define s0 0
#define s1 1024
#define s2 2048
#define s3 3072
#define w 4096
#define k 4128
/* register macros */
#define CTX %rdi
#define RS0 CTX
#define RS1 %r8
#define RS2 %r9
#define RS3 %r10
#define RK %r11
#define RW %rax
#define RROUND %r12
#define RROUNDd %r12d
#define RA0 %ymm8
#define RB0 %ymm9
#define RC0 %ymm10
#define RD0 %ymm11
#define RA1 %ymm12
#define RB1 %ymm13
#define RC1 %ymm14
#define RD1 %ymm15
/* temp regs */
#define RX0 %ymm0
#define RY0 %ymm1
#define RX1 %ymm2
#define RY1 %ymm3
#define RT0 %ymm4
#define RIDX %ymm5
#define RX0x %xmm0
#define RY0x %xmm1
#define RX1x %xmm2
#define RY1x %xmm3
#define RT0x %xmm4
/* vpgatherdd mask and '-1' */
#define RNOT %ymm6
/* byte mask, (-1 >> 24) */
#define RBYTE %ymm7
/**********************************************************************
16-way AVX2 twofish
**********************************************************************/
#define init_round_constants() \
vpcmpeqd RNOT, RNOT, RNOT; \
vpsrld $24, RNOT, RBYTE; \
leaq k(CTX), RK; \
leaq w(CTX), RW; \
leaq s1(CTX), RS1; \
leaq s2(CTX), RS2; \
leaq s3(CTX), RS3; \
#define g16(ab, rs0, rs1, rs2, rs3, xy) \
vpand RBYTE, ab ## 0, RIDX; \
vpgatherdd RNOT, (rs0, RIDX, 4), xy ## 0; \
vpcmpeqd RNOT, RNOT, RNOT; \
\
vpand RBYTE, ab ## 1, RIDX; \
vpgatherdd RNOT, (rs0, RIDX, 4), xy ## 1; \
vpcmpeqd RNOT, RNOT, RNOT; \
\
vpsrld $8, ab ## 0, RIDX; \
vpand RBYTE, RIDX, RIDX; \
vpgatherdd RNOT, (rs1, RIDX, 4), RT0; \
vpcmpeqd RNOT, RNOT, RNOT; \
vpxor RT0, xy ## 0, xy ## 0; \
\
vpsrld $8, ab ## 1, RIDX; \
vpand RBYTE, RIDX, RIDX; \
vpgatherdd RNOT, (rs1, RIDX, 4), RT0; \
vpcmpeqd RNOT, RNOT, RNOT; \
vpxor RT0, xy ## 1, xy ## 1; \
\
vpsrld $16, ab ## 0, RIDX; \
vpand RBYTE, RIDX, RIDX; \
vpgatherdd RNOT, (rs2, RIDX, 4), RT0; \
vpcmpeqd RNOT, RNOT, RNOT; \
vpxor RT0, xy ## 0, xy ## 0; \
\
vpsrld $16, ab ## 1, RIDX; \
vpand RBYTE, RIDX, RIDX; \
vpgatherdd RNOT, (rs2, RIDX, 4), RT0; \
vpcmpeqd RNOT, RNOT, RNOT; \
vpxor RT0, xy ## 1, xy ## 1; \
\
vpsrld $24, ab ## 0, RIDX; \
vpgatherdd RNOT, (rs3, RIDX, 4), RT0; \
vpcmpeqd RNOT, RNOT, RNOT; \
vpxor RT0, xy ## 0, xy ## 0; \
\
vpsrld $24, ab ## 1, RIDX; \
vpgatherdd RNOT, (rs3, RIDX, 4), RT0; \
vpcmpeqd RNOT, RNOT, RNOT; \
vpxor RT0, xy ## 1, xy ## 1;
#define g1_16(a, x) \
g16(a, RS0, RS1, RS2, RS3, x);
#define g2_16(b, y) \
g16(b, RS1, RS2, RS3, RS0, y);
#define encrypt_round_end16(a, b, c, d, nk) \
vpaddd RY0, RX0, RX0; \
vpaddd RX0, RY0, RY0; \
vpbroadcastd nk(RK,RROUND,8), RT0; \
vpaddd RT0, RX0, RX0; \
vpbroadcastd 4+nk(RK,RROUND,8), RT0; \
vpaddd RT0, RY0, RY0; \
\
vpxor RY0, d ## 0, d ## 0; \
\
vpxor RX0, c ## 0, c ## 0; \
vpsrld $1, c ## 0, RT0; \
vpslld $31, c ## 0, c ## 0; \
vpor RT0, c ## 0, c ## 0; \
\
vpaddd RY1, RX1, RX1; \
vpaddd RX1, RY1, RY1; \
vpbroadcastd nk(RK,RROUND,8), RT0; \
vpaddd RT0, RX1, RX1; \
vpbroadcastd 4+nk(RK,RROUND,8), RT0; \
vpaddd RT0, RY1, RY1; \
\
vpxor RY1, d ## 1, d ## 1; \
\
vpxor RX1, c ## 1, c ## 1; \
vpsrld $1, c ## 1, RT0; \
vpslld $31, c ## 1, c ## 1; \
vpor RT0, c ## 1, c ## 1; \
#define encrypt_round16(a, b, c, d, nk) \
g2_16(b, RY); \
\
vpslld $1, b ## 0, RT0; \
vpsrld $31, b ## 0, b ## 0; \
vpor RT0, b ## 0, b ## 0; \
\
vpslld $1, b ## 1, RT0; \
vpsrld $31, b ## 1, b ## 1; \
vpor RT0, b ## 1, b ## 1; \
\
g1_16(a, RX); \
\
encrypt_round_end16(a, b, c, d, nk);
#define encrypt_round_first16(a, b, c, d, nk) \
vpslld $1, d ## 0, RT0; \
vpsrld $31, d ## 0, d ## 0; \
vpor RT0, d ## 0, d ## 0; \
\
vpslld $1, d ## 1, RT0; \
vpsrld $31, d ## 1, d ## 1; \
vpor RT0, d ## 1, d ## 1; \
\
encrypt_round16(a, b, c, d, nk);
#define encrypt_round_last16(a, b, c, d, nk) \
g2_16(b, RY); \
\
g1_16(a, RX); \
\
encrypt_round_end16(a, b, c, d, nk);
#define decrypt_round_end16(a, b, c, d, nk) \
vpaddd RY0, RX0, RX0; \
vpaddd RX0, RY0, RY0; \
vpbroadcastd nk(RK,RROUND,8), RT0; \
vpaddd RT0, RX0, RX0; \
vpbroadcastd 4+nk(RK,RROUND,8), RT0; \
vpaddd RT0, RY0, RY0; \
\
vpxor RX0, c ## 0, c ## 0; \
\
vpxor RY0, d ## 0, d ## 0; \
vpsrld $1, d ## 0, RT0; \
vpslld $31, d ## 0, d ## 0; \
vpor RT0, d ## 0, d ## 0; \
\
vpaddd RY1, RX1, RX1; \
vpaddd RX1, RY1, RY1; \
vpbroadcastd nk(RK,RROUND,8), RT0; \
vpaddd RT0, RX1, RX1; \
vpbroadcastd 4+nk(RK,RROUND,8), RT0; \
vpaddd RT0, RY1, RY1; \
\
vpxor RX1, c ## 1, c ## 1; \
\
vpxor RY1, d ## 1, d ## 1; \
vpsrld $1, d ## 1, RT0; \
vpslld $31, d ## 1, d ## 1; \
vpor RT0, d ## 1, d ## 1;
#define decrypt_round16(a, b, c, d, nk) \
g1_16(a, RX); \
\
vpslld $1, a ## 0, RT0; \
vpsrld $31, a ## 0, a ## 0; \
vpor RT0, a ## 0, a ## 0; \
\
vpslld $1, a ## 1, RT0; \
vpsrld $31, a ## 1, a ## 1; \
vpor RT0, a ## 1, a ## 1; \
\
g2_16(b, RY); \
\
decrypt_round_end16(a, b, c, d, nk);
#define decrypt_round_first16(a, b, c, d, nk) \
vpslld $1, c ## 0, RT0; \
vpsrld $31, c ## 0, c ## 0; \
vpor RT0, c ## 0, c ## 0; \
\
vpslld $1, c ## 1, RT0; \
vpsrld $31, c ## 1, c ## 1; \
vpor RT0, c ## 1, c ## 1; \
\
decrypt_round16(a, b, c, d, nk)
#define decrypt_round_last16(a, b, c, d, nk) \
g1_16(a, RX); \
\
g2_16(b, RY); \
\
decrypt_round_end16(a, b, c, d, nk);
#define encrypt_cycle16() \
encrypt_round16(RA, RB, RC, RD, 0); \
encrypt_round16(RC, RD, RA, RB, 8);
#define encrypt_cycle_first16() \
encrypt_round_first16(RA, RB, RC, RD, 0); \
encrypt_round16(RC, RD, RA, RB, 8);
#define encrypt_cycle_last16() \
encrypt_round16(RA, RB, RC, RD, 0); \
encrypt_round_last16(RC, RD, RA, RB, 8);
#define decrypt_cycle16(n) \
decrypt_round16(RC, RD, RA, RB, 8); \
decrypt_round16(RA, RB, RC, RD, 0);
#define decrypt_cycle_first16(n) \
decrypt_round_first16(RC, RD, RA, RB, 8); \
decrypt_round16(RA, RB, RC, RD, 0);
#define decrypt_cycle_last16(n) \
decrypt_round16(RC, RD, RA, RB, 8); \
decrypt_round_last16(RA, RB, RC, RD, 0);
#define transpose_4x4(x0,x1,x2,x3,t1,t2) \
vpunpckhdq x1, x0, t2; \
vpunpckldq x1, x0, x0; \
\
vpunpckldq x3, x2, t1; \
vpunpckhdq x3, x2, x2; \
\
vpunpckhqdq t1, x0, x1; \
vpunpcklqdq t1, x0, x0; \
\
vpunpckhqdq x2, t2, x3; \
vpunpcklqdq x2, t2, x2;
#define read_blocks8(offs,a,b,c,d) \
transpose_4x4(a, b, c, d, RX0, RY0);
#define write_blocks8(offs,a,b,c,d) \
transpose_4x4(a, b, c, d, RX0, RY0);
#define inpack_enc8(a,b,c,d) \
vpbroadcastd 4*0(RW), RT0; \
vpxor RT0, a, a; \
\
vpbroadcastd 4*1(RW), RT0; \
vpxor RT0, b, b; \
\
vpbroadcastd 4*2(RW), RT0; \
vpxor RT0, c, c; \
\
vpbroadcastd 4*3(RW), RT0; \
vpxor RT0, d, d;
#define outunpack_enc8(a,b,c,d) \
vpbroadcastd 4*4(RW), RX0; \
vpbroadcastd 4*5(RW), RY0; \
vpxor RX0, c, RX0; \
vpxor RY0, d, RY0; \
\
vpbroadcastd 4*6(RW), RT0; \
vpxor RT0, a, c; \
vpbroadcastd 4*7(RW), RT0; \
vpxor RT0, b, d; \
\
vmovdqa RX0, a; \
vmovdqa RY0, b;
#define inpack_dec8(a,b,c,d) \
vpbroadcastd 4*4(RW), RX0; \
vpbroadcastd 4*5(RW), RY0; \
vpxor RX0, a, RX0; \
vpxor RY0, b, RY0; \
\
vpbroadcastd 4*6(RW), RT0; \
vpxor RT0, c, a; \
vpbroadcastd 4*7(RW), RT0; \
vpxor RT0, d, b; \
\
vmovdqa RX0, c; \
vmovdqa RY0, d;
#define outunpack_dec8(a,b,c,d) \
vpbroadcastd 4*0(RW), RT0; \
vpxor RT0, a, a; \
\
vpbroadcastd 4*1(RW), RT0; \
vpxor RT0, b, b; \
\
vpbroadcastd 4*2(RW), RT0; \
vpxor RT0, c, c; \
\
vpbroadcastd 4*3(RW), RT0; \
vpxor RT0, d, d;
#define read_blocks16(a,b,c,d) \
read_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \
read_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1);
#define write_blocks16(a,b,c,d) \
write_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \
write_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1);
#define xor_blocks16(a,b,c,d) \
xor_blocks8(0, a ## 0, b ## 0, c ## 0, d ## 0); \
xor_blocks8(8, a ## 1, b ## 1, c ## 1, d ## 1);
#define inpack_enc16(a,b,c,d) \
inpack_enc8(a ## 0, b ## 0, c ## 0, d ## 0); \
inpack_enc8(a ## 1, b ## 1, c ## 1, d ## 1);
#define outunpack_enc16(a,b,c,d) \
outunpack_enc8(a ## 0, b ## 0, c ## 0, d ## 0); \
outunpack_enc8(a ## 1, b ## 1, c ## 1, d ## 1);
#define inpack_dec16(a,b,c,d) \
inpack_dec8(a ## 0, b ## 0, c ## 0, d ## 0); \
inpack_dec8(a ## 1, b ## 1, c ## 1, d ## 1);
#define outunpack_dec16(a,b,c,d) \
outunpack_dec8(a ## 0, b ## 0, c ## 0, d ## 0); \
outunpack_dec8(a ## 1, b ## 1, c ## 1, d ## 1);
.align 8
__twofish_enc_blk16:
/* input:
* %rdi: ctx, CTX
* RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: plaintext
* output:
* RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: ciphertext
*/
init_round_constants();
read_blocks16(RA, RB, RC, RD);
inpack_enc16(RA, RB, RC, RD);
xorl RROUNDd, RROUNDd;
encrypt_cycle_first16();
movl $2, RROUNDd;
.align 4
.L__enc_loop:
encrypt_cycle16();
addl $2, RROUNDd;
cmpl $14, RROUNDd;
jne .L__enc_loop;
encrypt_cycle_last16();
outunpack_enc16(RA, RB, RC, RD);
write_blocks16(RA, RB, RC, RD);
ret;
ENDPROC(__twofish_enc_blk16)
.align 8
__twofish_dec_blk16:
/* input:
* %rdi: ctx, CTX
* RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: ciphertext
* output:
* RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1: plaintext
*/
init_round_constants();
read_blocks16(RA, RB, RC, RD);
inpack_dec16(RA, RB, RC, RD);
movl $14, RROUNDd;
decrypt_cycle_first16();
movl $12, RROUNDd;
.align 4
.L__dec_loop:
decrypt_cycle16();
addl $-2, RROUNDd;
jnz .L__dec_loop;
decrypt_cycle_last16();
outunpack_dec16(RA, RB, RC, RD);
write_blocks16(RA, RB, RC, RD);
ret;
ENDPROC(__twofish_dec_blk16)
ENTRY(twofish_ecb_enc_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
vzeroupper;
pushq %r12;
load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
call __twofish_enc_blk16;
store_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
popq %r12;
vzeroupper;
ret;
ENDPROC(twofish_ecb_enc_16way)
ENTRY(twofish_ecb_dec_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
vzeroupper;
pushq %r12;
load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
call __twofish_dec_blk16;
store_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
popq %r12;
vzeroupper;
ret;
ENDPROC(twofish_ecb_dec_16way)
ENTRY(twofish_cbc_dec_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst
* %rdx: src
*/
vzeroupper;
pushq %r12;
load_16way(%rdx, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
call __twofish_dec_blk16;
store_cbc_16way(%rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1,
RX0);
popq %r12;
vzeroupper;
ret;
ENDPROC(twofish_cbc_dec_16way)
ENTRY(twofish_ctr_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
* %rdx: src (16 blocks)
* %rcx: iv (little endian, 128bit)
*/
vzeroupper;
pushq %r12;
load_ctr_16way(%rcx, .Lbswap128_mask, RA0, RB0, RC0, RD0, RA1, RB1, RC1,
RD1, RX0, RX0x, RX1, RX1x, RY0, RY0x, RY1, RY1x, RNOT,
RBYTE);
call __twofish_enc_blk16;
store_ctr_16way(%rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
popq %r12;
vzeroupper;
ret;
ENDPROC(twofish_ctr_16way)
.align 8
twofish_xts_crypt_16way:
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
* %rdx: src (16 blocks)
* %rcx: iv (t α GF(2¹²))
* %r8: pointer to __twofish_enc_blk16 or __twofish_dec_blk16
*/
vzeroupper;
pushq %r12;
load_xts_16way(%rcx, %rdx, %rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1,
RD1, RX0, RX0x, RX1, RX1x, RY0, RY0x, RY1, RY1x, RNOT,
.Lxts_gf128mul_and_shl1_mask_0,
.Lxts_gf128mul_and_shl1_mask_1);
call *%r8;
store_xts_16way(%rsi, RA0, RB0, RC0, RD0, RA1, RB1, RC1, RD1);
popq %r12;
vzeroupper;
ret;
ENDPROC(twofish_xts_crypt_16way)
ENTRY(twofish_xts_enc_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
* %rdx: src (16 blocks)
* %rcx: iv (t α GF(2¹²))
*/
leaq __twofish_enc_blk16, %r8;
jmp twofish_xts_crypt_16way;
ENDPROC(twofish_xts_enc_16way)
ENTRY(twofish_xts_dec_16way)
/* input:
* %rdi: ctx, CTX
* %rsi: dst (16 blocks)
* %rdx: src (16 blocks)
* %rcx: iv (t α GF(2¹²))
*/
leaq __twofish_dec_blk16, %r8;
jmp twofish_xts_crypt_16way;
ENDPROC(twofish_xts_dec_16way)

View File

@ -1,584 +0,0 @@
/*
* Glue Code for x86_64/AVX2 assembler optimized version of Twofish
*
* Copyright © 2012-2013 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
*/
#include <linux/module.h>
#include <linux/types.h>
#include <linux/crypto.h>
#include <linux/err.h>
#include <crypto/algapi.h>
#include <crypto/ctr.h>
#include <crypto/twofish.h>
#include <crypto/lrw.h>
#include <crypto/xts.h>
#include <asm/xcr.h>
#include <asm/xsave.h>
#include <asm/crypto/twofish.h>
#include <asm/crypto/ablk_helper.h>
#include <asm/crypto/glue_helper.h>
#include <crypto/scatterwalk.h>
#define TF_AVX2_PARALLEL_BLOCKS 16
/* 16-way AVX2 parallel cipher functions */
asmlinkage void twofish_ecb_enc_16way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void twofish_ecb_dec_16way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void twofish_cbc_dec_16way(void *ctx, u128 *dst, const u128 *src);
asmlinkage void twofish_ctr_16way(void *ctx, u128 *dst, const u128 *src,
le128 *iv);
asmlinkage void twofish_xts_enc_16way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
asmlinkage void twofish_xts_dec_16way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src)
{
__twofish_enc_blk_3way(ctx, dst, src, false);
}
static const struct common_glue_ctx twofish_enc = {
.num_funcs = 4,
.fpu_blocks_limit = 8,
.funcs = { {
.num_blocks = 16,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_16way) }
}, {
.num_blocks = 8,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_enc_8way) }
}, {
.num_blocks = 3,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) }
} }
};
static const struct common_glue_ctx twofish_ctr = {
.num_funcs = 4,
.fpu_blocks_limit = 8,
.funcs = { {
.num_blocks = 16,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_16way) }
}, {
.num_blocks = 8,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_ctr_8way) }
}, {
.num_blocks = 3,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr_3way) }
}, {
.num_blocks = 1,
.fn_u = { .ctr = GLUE_CTR_FUNC_CAST(twofish_enc_blk_ctr) }
} }
};
static const struct common_glue_ctx twofish_enc_xts = {
.num_funcs = 3,
.fpu_blocks_limit = 8,
.funcs = { {
.num_blocks = 16,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_16way) }
}, {
.num_blocks = 8,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc_8way) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_enc) }
} }
};
static const struct common_glue_ctx twofish_dec = {
.num_funcs = 4,
.fpu_blocks_limit = 8,
.funcs = { {
.num_blocks = 16,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_16way) }
}, {
.num_blocks = 8,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_ecb_dec_8way) }
}, {
.num_blocks = 3,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) }
}, {
.num_blocks = 1,
.fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) }
} }
};
static const struct common_glue_ctx twofish_dec_cbc = {
.num_funcs = 4,
.fpu_blocks_limit = 8,
.funcs = { {
.num_blocks = 16,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_16way) }
}, {
.num_blocks = 8,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_cbc_dec_8way) }
}, {
.num_blocks = 3,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) }
}, {
.num_blocks = 1,
.fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) }
} }
};
static const struct common_glue_ctx twofish_dec_xts = {
.num_funcs = 3,
.fpu_blocks_limit = 8,
.funcs = { {
.num_blocks = 16,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_16way) }
}, {
.num_blocks = 8,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec_8way) }
}, {
.num_blocks = 1,
.fn_u = { .xts = GLUE_XTS_FUNC_CAST(twofish_xts_dec) }
} }
};
static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes);
}
static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes);
}
static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc,
dst, src, nbytes);
}
static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src,
nbytes);
}
static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes);
}
static inline bool twofish_fpu_begin(bool fpu_enabled, unsigned int nbytes)
{
/* since reusing AVX functions, starts using FPU at 8 parallel blocks */
return glue_fpu_begin(TF_BLOCK_SIZE, 8, NULL, fpu_enabled, nbytes);
}
static inline void twofish_fpu_end(bool fpu_enabled)
{
glue_fpu_end(fpu_enabled);
}
struct crypt_priv {
struct twofish_ctx *ctx;
bool fpu_enabled;
};
static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
{
const unsigned int bsize = TF_BLOCK_SIZE;
struct crypt_priv *ctx = priv;
int i;
ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
while (nbytes >= TF_AVX2_PARALLEL_BLOCKS * bsize) {
twofish_ecb_enc_16way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * TF_AVX2_PARALLEL_BLOCKS;
nbytes -= bsize * TF_AVX2_PARALLEL_BLOCKS;
}
while (nbytes >= 8 * bsize) {
twofish_ecb_enc_8way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * 8;
nbytes -= bsize * 8;
}
while (nbytes >= 3 * bsize) {
twofish_enc_blk_3way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * 3;
nbytes -= bsize * 3;
}
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
twofish_enc_blk(ctx->ctx, srcdst, srcdst);
}
static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
{
const unsigned int bsize = TF_BLOCK_SIZE;
struct crypt_priv *ctx = priv;
int i;
ctx->fpu_enabled = twofish_fpu_begin(ctx->fpu_enabled, nbytes);
while (nbytes >= TF_AVX2_PARALLEL_BLOCKS * bsize) {
twofish_ecb_dec_16way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * TF_AVX2_PARALLEL_BLOCKS;
nbytes -= bsize * TF_AVX2_PARALLEL_BLOCKS;
}
while (nbytes >= 8 * bsize) {
twofish_ecb_dec_8way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * 8;
nbytes -= bsize * 8;
}
while (nbytes >= 3 * bsize) {
twofish_dec_blk_3way(ctx->ctx, srcdst, srcdst);
srcdst += bsize * 3;
nbytes -= bsize * 3;
}
for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
twofish_dec_blk(ctx->ctx, srcdst, srcdst);
}
static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[TF_AVX2_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->twofish_ctx,
.fpu_enabled = false,
};
struct lrw_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.table_ctx = &ctx->lrw_table,
.crypt_ctx = &crypt_ctx,
.crypt_fn = encrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = lrw_crypt(desc, dst, src, nbytes, &req);
twofish_fpu_end(crypt_ctx.fpu_enabled);
return ret;
}
static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
be128 buf[TF_AVX2_PARALLEL_BLOCKS];
struct crypt_priv crypt_ctx = {
.ctx = &ctx->twofish_ctx,
.fpu_enabled = false,
};
struct lrw_crypt_req req = {
.tbuf = buf,
.tbuflen = sizeof(buf),
.table_ctx = &ctx->lrw_table,
.crypt_ctx = &crypt_ctx,
.crypt_fn = decrypt_callback,
};
int ret;
desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
ret = lrw_crypt(desc, dst, src, nbytes, &req);
twofish_fpu_end(crypt_ctx.fpu_enabled);
return ret;
}
static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
return glue_xts_crypt_128bit(&twofish_enc_xts, desc, dst, src, nbytes,
XTS_TWEAK_CAST(twofish_enc_blk),
&ctx->tweak_ctx, &ctx->crypt_ctx);
}
static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
struct scatterlist *src, unsigned int nbytes)
{
struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
return glue_xts_crypt_128bit(&twofish_dec_xts, desc, dst, src, nbytes,
XTS_TWEAK_CAST(twofish_enc_blk),
&ctx->tweak_ctx, &ctx->crypt_ctx);
}
static struct crypto_alg tf_algs[10] = { {
.cra_name = "__ecb-twofish-avx2",
.cra_driver_name = "__driver-ecb-twofish-avx2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct twofish_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = TF_MIN_KEY_SIZE,
.max_keysize = TF_MAX_KEY_SIZE,
.setkey = twofish_setkey,
.encrypt = ecb_encrypt,
.decrypt = ecb_decrypt,
},
},
}, {
.cra_name = "__cbc-twofish-avx2",
.cra_driver_name = "__driver-cbc-twofish-avx2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct twofish_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = TF_MIN_KEY_SIZE,
.max_keysize = TF_MAX_KEY_SIZE,
.setkey = twofish_setkey,
.encrypt = cbc_encrypt,
.decrypt = cbc_decrypt,
},
},
}, {
.cra_name = "__ctr-twofish-avx2",
.cra_driver_name = "__driver-ctr-twofish-avx2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct twofish_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = TF_MIN_KEY_SIZE,
.max_keysize = TF_MAX_KEY_SIZE,
.ivsize = TF_BLOCK_SIZE,
.setkey = twofish_setkey,
.encrypt = ctr_crypt,
.decrypt = ctr_crypt,
},
},
}, {
.cra_name = "__lrw-twofish-avx2",
.cra_driver_name = "__driver-lrw-twofish-avx2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct twofish_lrw_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_exit = lrw_twofish_exit_tfm,
.cra_u = {
.blkcipher = {
.min_keysize = TF_MIN_KEY_SIZE +
TF_BLOCK_SIZE,
.max_keysize = TF_MAX_KEY_SIZE +
TF_BLOCK_SIZE,
.ivsize = TF_BLOCK_SIZE,
.setkey = lrw_twofish_setkey,
.encrypt = lrw_encrypt,
.decrypt = lrw_decrypt,
},
},
}, {
.cra_name = "__xts-twofish-avx2",
.cra_driver_name = "__driver-xts-twofish-avx2",
.cra_priority = 0,
.cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct twofish_xts_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_blkcipher_type,
.cra_module = THIS_MODULE,
.cra_u = {
.blkcipher = {
.min_keysize = TF_MIN_KEY_SIZE * 2,
.max_keysize = TF_MAX_KEY_SIZE * 2,
.ivsize = TF_BLOCK_SIZE,
.setkey = xts_twofish_setkey,
.encrypt = xts_encrypt,
.decrypt = xts_decrypt,
},
},
}, {
.cra_name = "ecb(twofish)",
.cra_driver_name = "ecb-twofish-avx2",
.cra_priority = 500,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = TF_MIN_KEY_SIZE,
.max_keysize = TF_MAX_KEY_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "cbc(twofish)",
.cra_driver_name = "cbc-twofish-avx2",
.cra_priority = 500,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = TF_MIN_KEY_SIZE,
.max_keysize = TF_MAX_KEY_SIZE,
.ivsize = TF_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = __ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "ctr(twofish)",
.cra_driver_name = "ctr-twofish-avx2",
.cra_priority = 500,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = 1,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = TF_MIN_KEY_SIZE,
.max_keysize = TF_MAX_KEY_SIZE,
.ivsize = TF_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_encrypt,
.geniv = "chainiv",
},
},
}, {
.cra_name = "lrw(twofish)",
.cra_driver_name = "lrw-twofish-avx2",
.cra_priority = 500,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = TF_MIN_KEY_SIZE +
TF_BLOCK_SIZE,
.max_keysize = TF_MAX_KEY_SIZE +
TF_BLOCK_SIZE,
.ivsize = TF_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
}, {
.cra_name = "xts(twofish)",
.cra_driver_name = "xts-twofish-avx2",
.cra_priority = 500,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
.cra_blocksize = TF_BLOCK_SIZE,
.cra_ctxsize = sizeof(struct async_helper_ctx),
.cra_alignmask = 0,
.cra_type = &crypto_ablkcipher_type,
.cra_module = THIS_MODULE,
.cra_init = ablk_init,
.cra_exit = ablk_exit,
.cra_u = {
.ablkcipher = {
.min_keysize = TF_MIN_KEY_SIZE * 2,
.max_keysize = TF_MAX_KEY_SIZE * 2,
.ivsize = TF_BLOCK_SIZE,
.setkey = ablk_set_key,
.encrypt = ablk_encrypt,
.decrypt = ablk_decrypt,
},
},
} };
static int __init init(void)
{
u64 xcr0;
if (!cpu_has_avx2 || !cpu_has_osxsave) {
pr_info("AVX2 instructions are not detected.\n");
return -ENODEV;
}
xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
pr_info("AVX2 detected but unusable.\n");
return -ENODEV;
}
return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs));
}
static void __exit fini(void)
{
crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs));
}
module_init(init);
module_exit(fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Twofish Cipher Algorithm, AVX2 optimized");
MODULE_ALIAS("twofish");
MODULE_ALIAS("twofish-asm");

View File

@ -50,26 +50,18 @@
/* 8-way parallel cipher functions */
asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(twofish_ecb_enc_8way);
asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(twofish_ecb_dec_8way);
asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
EXPORT_SYMBOL_GPL(twofish_cbc_dec_8way);
asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
EXPORT_SYMBOL_GPL(twofish_ctr_8way);
asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
EXPORT_SYMBOL_GPL(twofish_xts_enc_8way);
asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
EXPORT_SYMBOL_GPL(twofish_xts_dec_8way);
static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src)
@ -77,19 +69,17 @@ static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
__twofish_enc_blk_3way(ctx, dst, src, false);
}
void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
static void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv,
GLUE_FUNC_CAST(twofish_enc_blk));
}
EXPORT_SYMBOL_GPL(twofish_xts_enc);
void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
static void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv)
{
glue_xts_crypt_128bit_one(ctx, dst, src, iv,
GLUE_FUNC_CAST(twofish_dec_blk));
}
EXPORT_SYMBOL_GPL(twofish_xts_dec);
static const struct common_glue_ctx twofish_enc = {

View File

@ -1,43 +0,0 @@
#ifndef ASM_X86_BLOWFISH_H
#define ASM_X86_BLOWFISH_H
#include <linux/crypto.h>
#include <crypto/blowfish.h>
#define BF_PARALLEL_BLOCKS 4
/* regular block cipher functions */
asmlinkage void __blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src,
bool xor);
asmlinkage void blowfish_dec_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src);
/* 4-way parallel cipher functions */
asmlinkage void __blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src, bool xor);
asmlinkage void blowfish_dec_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src);
static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src)
{
__blowfish_enc_blk(ctx, dst, src, false);
}
static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst,
const u8 *src)
{
__blowfish_enc_blk(ctx, dst, src, true);
}
static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src)
{
__blowfish_enc_blk_4way(ctx, dst, src, false);
}
static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst,
const u8 *src)
{
__blowfish_enc_blk_4way(ctx, dst, src, true);
}
#endif

View File

@ -28,20 +28,6 @@ asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst,
asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
/* 8-way parallel cipher functions */
asmlinkage void twofish_ecb_enc_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void twofish_ecb_dec_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void twofish_cbc_dec_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src);
asmlinkage void twofish_ctr_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
asmlinkage void twofish_xts_enc_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
asmlinkage void twofish_xts_dec_8way(struct twofish_ctx *ctx, u8 *dst,
const u8 *src, le128 *iv);
/* helpers from twofish_x86_64-3way module */
extern void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src);
extern void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src,
@ -57,8 +43,4 @@ extern void lrw_twofish_exit_tfm(struct crypto_tfm *tfm);
extern int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key,
unsigned int keylen);
/* helpers from twofish-avx module */
extern void twofish_xts_enc(void *ctx, u128 *dst, const u128 *src, le128 *iv);
extern void twofish_xts_dec(void *ctx, u128 *dst, const u128 *src, le128 *iv);
#endif /* ASM_X86_TWOFISH_H */

View File

@ -376,6 +376,25 @@ config CRYPTO_CRC32_PCLMUL
which will enable any routine to use the CRC-32-IEEE 802.3 checksum
and gain better performance as compared with the table implementation.
config CRYPTO_CRCT10DIF
tristate "CRCT10DIF algorithm"
select CRYPTO_HASH
help
CRC T10 Data Integrity Field computation is being cast as
a crypto transform. This allows for faster crc t10 diff
transforms to be used if they are available.
config CRYPTO_CRCT10DIF_PCLMUL
tristate "CRCT10DIF PCLMULQDQ hardware acceleration"
depends on X86 && 64BIT && CRC_T10DIF
select CRYPTO_HASH
help
For x86_64 processors with SSE4.2 and PCLMULQDQ supported,
CRC T10 DIF PCLMULQDQ computation can be hardware
accelerated PCLMULQDQ instruction. This option will create
'crct10dif-plcmul' module, which is faster when computing the
crct10dif checksum as compared with the generic table implementation.
config CRYPTO_GHASH
tristate "GHASH digest algorithm"
select CRYPTO_GF128MUL
@ -820,25 +839,6 @@ config CRYPTO_BLOWFISH_X86_64
See also:
<http://www.schneier.com/blowfish.html>
config CRYPTO_BLOWFISH_AVX2_X86_64
tristate "Blowfish cipher algorithm (x86_64/AVX2)"
depends on X86 && 64BIT
depends on BROKEN
select CRYPTO_ALGAPI
select CRYPTO_CRYPTD
select CRYPTO_ABLK_HELPER_X86
select CRYPTO_BLOWFISH_COMMON
select CRYPTO_BLOWFISH_X86_64
help
Blowfish cipher algorithm (x86_64/AVX2), by Bruce Schneier.
This is a variable key length cipher which can use keys from 32
bits to 448 bits in length. It's fast, simple and specifically
designed for use on "large microprocessors".
See also:
<http://www.schneier.com/blowfish.html>
config CRYPTO_CAMELLIA
tristate "Camellia cipher algorithms"
depends on CRYPTO
@ -1297,31 +1297,6 @@ config CRYPTO_TWOFISH_AVX_X86_64
See also:
<http://www.schneier.com/twofish.html>
config CRYPTO_TWOFISH_AVX2_X86_64
tristate "Twofish cipher algorithm (x86_64/AVX2)"
depends on X86 && 64BIT
depends on BROKEN
select CRYPTO_ALGAPI
select CRYPTO_CRYPTD
select CRYPTO_ABLK_HELPER_X86
select CRYPTO_GLUE_HELPER_X86
select CRYPTO_TWOFISH_COMMON
select CRYPTO_TWOFISH_X86_64
select CRYPTO_TWOFISH_X86_64_3WAY
select CRYPTO_TWOFISH_AVX_X86_64
select CRYPTO_LRW
select CRYPTO_XTS
help
Twofish cipher algorithm (x86_64/AVX2).
Twofish was submitted as an AES (Advanced Encryption Standard)
candidate cipher by researchers at CounterPane Systems. It is a
16 round block cipher supporting key sizes of 128, 192, and 256
bits.
See also:
<http://www.schneier.com/twofish.html>
comment "Compression"
config CRYPTO_DEFLATE

View File

@ -83,6 +83,7 @@ obj-$(CONFIG_CRYPTO_ZLIB) += zlib.o
obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o
obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o
obj-$(CONFIG_CRYPTO_CRC32) += crc32.o
obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif.o
obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
obj-$(CONFIG_CRYPTO_LZO) += lzo.o
obj-$(CONFIG_CRYPTO_842) += 842.o

178
crypto/crct10dif.c Normal file
View File

@ -0,0 +1,178 @@
/*
* Cryptographic API.
*
* T10 Data Integrity Field CRC16 Crypto Transform
*
* Copyright (c) 2007 Oracle Corporation. All rights reserved.
* Written by Martin K. Petersen <martin.petersen@oracle.com>
* Copyright (C) 2013 Intel Corporation
* Author: Tim Chen <tim.c.chen@linux.intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
*/
#include <linux/types.h>
#include <linux/module.h>
#include <linux/crc-t10dif.h>
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/string.h>
#include <linux/kernel.h>
struct chksum_desc_ctx {
__u16 crc;
};
/* Table generated using the following polynomium:
* x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1
* gt: 0x8bb7
*/
static const __u16 t10_dif_crc_table[256] = {
0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1,
0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C,
0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C,
0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781,
0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8,
0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255,
0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925,
0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698,
0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472,
0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF,
0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF,
0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02,
0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA,
0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067,
0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17,
0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA,
0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640,
0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD,
0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D,
0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30,
0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759,
0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4,
0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394,
0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29,
0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3,
0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E,
0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E,
0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
};
__u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len)
{
unsigned int i;
for (i = 0 ; i < len ; i++)
crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff];
return crc;
}
EXPORT_SYMBOL(crc_t10dif_generic);
/*
* Steps through buffer one byte at at time, calculates reflected
* crc using table.
*/
static int chksum_init(struct shash_desc *desc)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
ctx->crc = 0;
return 0;
}
static int chksum_update(struct shash_desc *desc, const u8 *data,
unsigned int length)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
ctx->crc = crc_t10dif_generic(ctx->crc, data, length);
return 0;
}
static int chksum_final(struct shash_desc *desc, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
*(__u16 *)out = ctx->crc;
return 0;
}
static int __chksum_finup(__u16 *crcp, const u8 *data, unsigned int len,
u8 *out)
{
*(__u16 *)out = crc_t10dif_generic(*crcp, data, len);
return 0;
}
static int chksum_finup(struct shash_desc *desc, const u8 *data,
unsigned int len, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
return __chksum_finup(&ctx->crc, data, len, out);
}
static int chksum_digest(struct shash_desc *desc, const u8 *data,
unsigned int length, u8 *out)
{
struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
return __chksum_finup(&ctx->crc, data, length, out);
}
static struct shash_alg alg = {
.digestsize = CRC_T10DIF_DIGEST_SIZE,
.init = chksum_init,
.update = chksum_update,
.final = chksum_final,
.finup = chksum_finup,
.digest = chksum_digest,
.descsize = sizeof(struct chksum_desc_ctx),
.base = {
.cra_name = "crct10dif",
.cra_driver_name = "crct10dif-generic",
.cra_priority = 100,
.cra_blocksize = CRC_T10DIF_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
static int __init crct10dif_mod_init(void)
{
int ret;
ret = crypto_register_shash(&alg);
return ret;
}
static void __exit crct10dif_mod_fini(void)
{
crypto_unregister_shash(&alg);
}
module_init(crct10dif_mod_init);
module_exit(crct10dif_mod_fini);
MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>");
MODULE_DESCRIPTION("T10 DIF CRC calculation.");
MODULE_LICENSE("GPL");

View File

@ -251,6 +251,7 @@ static struct shash_alg sha512_algs[2] = { {
.descsize = sizeof(struct sha512_state),
.base = {
.cra_name = "sha512",
.cra_driver_name = "sha512-generic",
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
@ -263,6 +264,7 @@ static struct shash_alg sha512_algs[2] = { {
.descsize = sizeof(struct sha512_state),
.base = {
.cra_name = "sha384",
.cra_driver_name = "sha384-generic",
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA384_BLOCK_SIZE,
.cra_module = THIS_MODULE,

View File

@ -1174,6 +1174,10 @@ static int do_test(int m)
ret += tcrypt_test("ghash");
break;
case 47:
ret += tcrypt_test("crct10dif");
break;
case 100:
ret += tcrypt_test("hmac(md5)");
break;
@ -1498,6 +1502,10 @@ static int do_test(int m)
test_hash_speed("crc32c", sec, generic_hash_speed_template);
if (mode > 300 && mode < 400) break;
case 320:
test_hash_speed("crct10dif", sec, generic_hash_speed_template);
if (mode > 300 && mode < 400) break;
case 399:
break;

View File

@ -184,8 +184,9 @@ static int do_one_async_hash_op(struct ahash_request *req,
return ret;
}
static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
unsigned int tcount, bool use_digest)
static int __test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
unsigned int tcount, bool use_digest,
const int align_offset)
{
const char *algo = crypto_tfm_alg_driver_name(crypto_ahash_tfm(tfm));
unsigned int i, j, k, temp;
@ -216,10 +217,15 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
if (template[i].np)
continue;
ret = -EINVAL;
if (WARN_ON(align_offset + template[i].psize > PAGE_SIZE))
goto out;
j++;
memset(result, 0, 64);
hash_buff = xbuf[0];
hash_buff += align_offset;
memcpy(hash_buff, template[i].plaintext, template[i].psize);
sg_init_one(&sg[0], hash_buff, template[i].psize);
@ -281,6 +287,10 @@ static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
j = 0;
for (i = 0; i < tcount; i++) {
/* alignment tests are only done with continuous buffers */
if (align_offset != 0)
break;
if (template[i].np) {
j++;
memset(result, 0, 64);
@ -358,9 +368,36 @@ out_nobuf:
return ret;
}
static int test_hash(struct crypto_ahash *tfm, struct hash_testvec *template,
unsigned int tcount, bool use_digest)
{
unsigned int alignmask;
int ret;
ret = __test_hash(tfm, template, tcount, use_digest, 0);
if (ret)
return ret;
/* test unaligned buffers, check with one byte offset */
ret = __test_hash(tfm, template, tcount, use_digest, 1);
if (ret)
return ret;
alignmask = crypto_tfm_alg_alignmask(&tfm->base);
if (alignmask) {
/* Check if alignment mask for tfm is correctly set. */
ret = __test_hash(tfm, template, tcount, use_digest,
alignmask + 1);
if (ret)
return ret;
}
return 0;
}
static int __test_aead(struct crypto_aead *tfm, int enc,
struct aead_testvec *template, unsigned int tcount,
const bool diff_dst)
const bool diff_dst, const int align_offset)
{
const char *algo = crypto_tfm_alg_driver_name(crypto_aead_tfm(tfm));
unsigned int i, j, k, n, temp;
@ -423,15 +460,16 @@ static int __test_aead(struct crypto_aead *tfm, int enc,
if (!template[i].np) {
j++;
/* some tepmplates have no input data but they will
/* some templates have no input data but they will
* touch input
*/
input = xbuf[0];
input += align_offset;
assoc = axbuf[0];
ret = -EINVAL;
if (WARN_ON(template[i].ilen > PAGE_SIZE ||
template[i].alen > PAGE_SIZE))
if (WARN_ON(align_offset + template[i].ilen >
PAGE_SIZE || template[i].alen > PAGE_SIZE))
goto out;
memcpy(input, template[i].input, template[i].ilen);
@ -470,6 +508,7 @@ static int __test_aead(struct crypto_aead *tfm, int enc,
if (diff_dst) {
output = xoutbuf[0];
output += align_offset;
sg_init_one(&sgout[0], output,
template[i].ilen +
(enc ? authsize : 0));
@ -530,6 +569,10 @@ static int __test_aead(struct crypto_aead *tfm, int enc,
}
for (i = 0, j = 0; i < tcount; i++) {
/* alignment tests are only done with continuous buffers */
if (align_offset != 0)
break;
if (template[i].np) {
j++;
@ -732,15 +775,34 @@ out_noxbuf:
static int test_aead(struct crypto_aead *tfm, int enc,
struct aead_testvec *template, unsigned int tcount)
{
unsigned int alignmask;
int ret;
/* test 'dst == src' case */
ret = __test_aead(tfm, enc, template, tcount, false);
ret = __test_aead(tfm, enc, template, tcount, false, 0);
if (ret)
return ret;
/* test 'dst != src' case */
return __test_aead(tfm, enc, template, tcount, true);
ret = __test_aead(tfm, enc, template, tcount, true, 0);
if (ret)
return ret;
/* test unaligned buffers, check with one byte offset */
ret = __test_aead(tfm, enc, template, tcount, true, 1);
if (ret)
return ret;
alignmask = crypto_tfm_alg_alignmask(&tfm->base);
if (alignmask) {
/* Check if alignment mask for tfm is correctly set. */
ret = __test_aead(tfm, enc, template, tcount, true,
alignmask + 1);
if (ret)
return ret;
}
return 0;
}
static int test_cipher(struct crypto_cipher *tfm, int enc,
@ -820,7 +882,7 @@ out_nobuf:
static int __test_skcipher(struct crypto_ablkcipher *tfm, int enc,
struct cipher_testvec *template, unsigned int tcount,
const bool diff_dst)
const bool diff_dst, const int align_offset)
{
const char *algo =
crypto_tfm_alg_driver_name(crypto_ablkcipher_tfm(tfm));
@ -876,10 +938,12 @@ static int __test_skcipher(struct crypto_ablkcipher *tfm, int enc,
j++;
ret = -EINVAL;
if (WARN_ON(template[i].ilen > PAGE_SIZE))
if (WARN_ON(align_offset + template[i].ilen >
PAGE_SIZE))
goto out;
data = xbuf[0];
data += align_offset;
memcpy(data, template[i].input, template[i].ilen);
crypto_ablkcipher_clear_flags(tfm, ~0);
@ -900,6 +964,7 @@ static int __test_skcipher(struct crypto_ablkcipher *tfm, int enc,
sg_init_one(&sg[0], data, template[i].ilen);
if (diff_dst) {
data = xoutbuf[0];
data += align_offset;
sg_init_one(&sgout[0], data, template[i].ilen);
}
@ -941,6 +1006,9 @@ static int __test_skcipher(struct crypto_ablkcipher *tfm, int enc,
j = 0;
for (i = 0; i < tcount; i++) {
/* alignment tests are only done with continuous buffers */
if (align_offset != 0)
break;
if (template[i].iv)
memcpy(iv, template[i].iv, MAX_IVLEN);
@ -1075,15 +1143,34 @@ out_nobuf:
static int test_skcipher(struct crypto_ablkcipher *tfm, int enc,
struct cipher_testvec *template, unsigned int tcount)
{
unsigned int alignmask;
int ret;
/* test 'dst == src' case */
ret = __test_skcipher(tfm, enc, template, tcount, false);
ret = __test_skcipher(tfm, enc, template, tcount, false, 0);
if (ret)
return ret;
/* test 'dst != src' case */
return __test_skcipher(tfm, enc, template, tcount, true);
ret = __test_skcipher(tfm, enc, template, tcount, true, 0);
if (ret)
return ret;
/* test unaligned buffers, check with one byte offset */
ret = __test_skcipher(tfm, enc, template, tcount, true, 1);
if (ret)
return ret;
alignmask = crypto_tfm_alg_alignmask(&tfm->base);
if (alignmask) {
/* Check if alignment mask for tfm is correctly set. */
ret = __test_skcipher(tfm, enc, template, tcount, true,
alignmask + 1);
if (ret)
return ret;
}
return 0;
}
static int test_comp(struct crypto_comp *tfm, struct comp_testvec *ctemplate,
@ -1653,16 +1740,10 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "__cbc-twofish-avx",
.test = alg_test_null,
}, {
.alg = "__cbc-twofish-avx2",
.test = alg_test_null,
}, {
.alg = "__driver-cbc-aes-aesni",
.test = alg_test_null,
.fips_allowed = 1,
}, {
.alg = "__driver-cbc-blowfish-avx2",
.test = alg_test_null,
}, {
.alg = "__driver-cbc-camellia-aesni",
.test = alg_test_null,
@ -1687,16 +1768,10 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "__driver-cbc-twofish-avx",
.test = alg_test_null,
}, {
.alg = "__driver-cbc-twofish-avx2",
.test = alg_test_null,
}, {
.alg = "__driver-ecb-aes-aesni",
.test = alg_test_null,
.fips_allowed = 1,
}, {
.alg = "__driver-ecb-blowfish-avx2",
.test = alg_test_null,
}, {
.alg = "__driver-ecb-camellia-aesni",
.test = alg_test_null,
@ -1721,9 +1796,6 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "__driver-ecb-twofish-avx",
.test = alg_test_null,
}, {
.alg = "__driver-ecb-twofish-avx2",
.test = alg_test_null,
}, {
.alg = "__ghash-pclmulqdqni",
.test = alg_test_null,
@ -1973,13 +2045,20 @@ static const struct alg_test_desc alg_test_descs[] = {
.count = CRC32C_TEST_VECTORS
}
}
}, {
.alg = "crct10dif",
.test = alg_test_hash,
.fips_allowed = 1,
.suite = {
.hash = {
.vecs = crct10dif_tv_template,
.count = CRCT10DIF_TEST_VECTORS
}
}
}, {
.alg = "cryptd(__driver-cbc-aes-aesni)",
.test = alg_test_null,
.fips_allowed = 1,
}, {
.alg = "cryptd(__driver-cbc-blowfish-avx2)",
.test = alg_test_null,
}, {
.alg = "cryptd(__driver-cbc-camellia-aesni)",
.test = alg_test_null,
@ -1993,9 +2072,6 @@ static const struct alg_test_desc alg_test_descs[] = {
.alg = "cryptd(__driver-ecb-aes-aesni)",
.test = alg_test_null,
.fips_allowed = 1,
}, {
.alg = "cryptd(__driver-ecb-blowfish-avx2)",
.test = alg_test_null,
}, {
.alg = "cryptd(__driver-ecb-camellia-aesni)",
.test = alg_test_null,
@ -2020,9 +2096,6 @@ static const struct alg_test_desc alg_test_descs[] = {
}, {
.alg = "cryptd(__driver-ecb-twofish-avx)",
.test = alg_test_null,
}, {
.alg = "cryptd(__driver-ecb-twofish-avx2)",
.test = alg_test_null,
}, {
.alg = "cryptd(__driver-gcm-aes-aesni)",
.test = alg_test_null,
@ -3068,6 +3141,35 @@ static const struct alg_test_desc alg_test_descs[] = {
}
};
static bool alg_test_descs_checked;
static void alg_test_descs_check_order(void)
{
int i;
/* only check once */
if (alg_test_descs_checked)
return;
alg_test_descs_checked = true;
for (i = 1; i < ARRAY_SIZE(alg_test_descs); i++) {
int diff = strcmp(alg_test_descs[i - 1].alg,
alg_test_descs[i].alg);
if (WARN_ON(diff > 0)) {
pr_warn("testmgr: alg_test_descs entries in wrong order: '%s' before '%s'\n",
alg_test_descs[i - 1].alg,
alg_test_descs[i].alg);
}
if (WARN_ON(diff == 0)) {
pr_warn("testmgr: duplicate alg_test_descs entry: '%s'\n",
alg_test_descs[i].alg);
}
}
}
static int alg_find_test(const char *alg)
{
int start = 0;
@ -3099,6 +3201,8 @@ int alg_test(const char *driver, const char *alg, u32 type, u32 mask)
int j;
int rc;
alg_test_descs_check_order();
if ((type & CRYPTO_ALG_TYPE_MASK) == CRYPTO_ALG_TYPE_CIPHER) {
char nalg[CRYPTO_MAX_ALG_NAME];

View File

@ -450,6 +450,39 @@ static struct hash_testvec rmd320_tv_template[] = {
}
};
#define CRCT10DIF_TEST_VECTORS 3
static struct hash_testvec crct10dif_tv_template[] = {
{
.plaintext = "abc",
.psize = 3,
#ifdef __LITTLE_ENDIAN
.digest = "\x3b\x44",
#else
.digest = "\x44\x3b",
#endif
}, {
.plaintext = "1234567890123456789012345678901234567890"
"123456789012345678901234567890123456789",
.psize = 79,
#ifdef __LITTLE_ENDIAN
.digest = "\x70\x4b",
#else
.digest = "\x4b\x70",
#endif
}, {
.plaintext =
"abcddddddddddddddddddddddddddddddddddddddddddddddddddddd",
.psize = 56,
#ifdef __LITTLE_ENDIAN
.digest = "\xe3\x9c",
#else
.digest = "\x9c\xe3",
#endif
.np = 2,
.tap = { 28, 28 }
}
};
/*
* SHA1 test vectors from from FIPS PUB 180-1
* Long vector from CAVS 5.0

View File

@ -108,8 +108,6 @@ static int atmel_trng_remove(struct platform_device *pdev)
clk_disable(trng->clk);
clk_put(trng->clk);
platform_set_drvdata(pdev, NULL);
return 0;
}

View File

@ -137,7 +137,6 @@ static int bcm63xx_rng_probe(struct platform_device *pdev)
out_clk_disable:
clk_disable(clk);
out_free_rng:
platform_set_drvdata(pdev, NULL);
kfree(rng);
out_free_priv:
kfree(priv);
@ -154,7 +153,6 @@ static int bcm63xx_rng_remove(struct platform_device *pdev)
clk_disable(priv->clk);
kfree(priv);
kfree(rng);
platform_set_drvdata(pdev, NULL);
return 0;
}

View File

@ -700,7 +700,7 @@ static int n2rng_probe(struct platform_device *op)
if (err)
goto out_free_units;
dev_set_drvdata(&op->dev, np);
platform_set_drvdata(op, np);
schedule_delayed_work(&np->work, 0);
@ -721,7 +721,7 @@ out:
static int n2rng_remove(struct platform_device *op)
{
struct n2rng *np = dev_get_drvdata(&op->dev);
struct n2rng *np = platform_get_drvdata(op);
np->flags |= N2RNG_FLAG_SHUTDOWN;
@ -736,8 +736,6 @@ static int n2rng_remove(struct platform_device *op)
kfree(np);
dev_set_drvdata(&op->dev, NULL);
return 0;
}

View File

@ -51,7 +51,7 @@ static int nmk_rng_probe(struct amba_device *dev, const struct amba_id *id)
return ret;
}
clk_enable(rng_clk);
clk_prepare_enable(rng_clk);
ret = amba_request_regions(dev, dev->dev.init_name);
if (ret)

View File

@ -96,7 +96,7 @@ static int octeon_rng_probe(struct platform_device *pdev)
rng->ops = ops;
dev_set_drvdata(&pdev->dev, &rng->ops);
platform_set_drvdata(pdev, &rng->ops);
ret = hwrng_register(&rng->ops);
if (ret)
return -ENOENT;
@ -108,7 +108,7 @@ static int octeon_rng_probe(struct platform_device *pdev)
static int __exit octeon_rng_remove(struct platform_device *pdev)
{
struct hwrng *rng = dev_get_drvdata(&pdev->dev);
struct hwrng *rng = platform_get_drvdata(pdev);
hwrng_unregister(rng);

View File

@ -116,7 +116,7 @@ static int omap_rng_probe(struct platform_device *pdev)
};
omap_rng_ops.priv = (unsigned long)priv;
dev_set_drvdata(&pdev->dev, priv);
platform_set_drvdata(pdev, priv);
priv->mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
priv->base = devm_ioremap_resource(&pdev->dev, priv->mem_res);
@ -124,7 +124,7 @@ static int omap_rng_probe(struct platform_device *pdev)
ret = PTR_ERR(priv->base);
goto err_ioremap;
}
dev_set_drvdata(&pdev->dev, priv);
platform_set_drvdata(pdev, priv);
pm_runtime_enable(&pdev->dev);
pm_runtime_get_sync(&pdev->dev);
@ -151,7 +151,7 @@ err_ioremap:
static int __exit omap_rng_remove(struct platform_device *pdev)
{
struct omap_rng_private_data *priv = dev_get_drvdata(&pdev->dev);
struct omap_rng_private_data *priv = platform_get_drvdata(pdev);
hwrng_unregister(&omap_rng_ops);

View File

@ -192,7 +192,6 @@ out_release_io:
out_timer:
del_timer_sync(&priv->timer);
out_free:
platform_set_drvdata(pdev, NULL);
kfree(priv);
return err;
}
@ -209,7 +208,6 @@ static int timeriomem_rng_remove(struct platform_device *pdev)
del_timer_sync(&priv->timer);
iounmap(priv->io_base);
release_mem_region(res->start, resource_size(res));
platform_set_drvdata(pdev, NULL);
kfree(priv);
return 0;

View File

@ -154,7 +154,6 @@ static int __exit tx4939_rng_remove(struct platform_device *dev)
struct tx4939_rng *rngdev = platform_get_drvdata(dev);
hwrng_unregister(&rngdev->rng);
platform_set_drvdata(dev, NULL);
return 0;
}

View File

@ -278,7 +278,7 @@ config CRYPTO_DEV_PICOXCELL
config CRYPTO_DEV_SAHARA
tristate "Support for SAHARA crypto accelerator"
depends on ARCH_MXC && EXPERIMENTAL && OF
depends on ARCH_MXC && OF
select CRYPTO_BLKCIPHER
select CRYPTO_AES
select CRYPTO_ECB
@ -286,6 +286,16 @@ config CRYPTO_DEV_SAHARA
This option enables support for the SAHARA HW crypto accelerator
found in some Freescale i.MX chips.
config CRYPTO_DEV_DCP
tristate "Support for the DCP engine"
depends on ARCH_MXS && OF
select CRYPTO_BLKCIPHER
select CRYPTO_AES
select CRYPTO_CBC
help
This options enables support for the hardware crypto-acceleration
capabilities of the DCP co-processor
config CRYPTO_DEV_S5P
tristate "Support for Samsung S5PV210 crypto accelerator"
depends on ARCH_S5PV210

View File

@ -13,6 +13,7 @@ obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o
obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes.o
obj-$(CONFIG_CRYPTO_DEV_PICOXCELL) += picoxcell_crypto.o
obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o
obj-$(CONFIG_CRYPTO_DEV_DCP) += dcp.o
obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o
obj-$(CONFIG_CRYPTO_DEV_TEGRA_AES) += tegra-aes.o
obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/

View File

@ -202,6 +202,7 @@ static int caam_probe(struct platform_device *pdev)
#ifdef CONFIG_DEBUG_FS
struct caam_perfmon *perfmon;
#endif
u64 cha_vid;
ctrlpriv = kzalloc(sizeof(struct caam_drv_private), GFP_KERNEL);
if (!ctrlpriv)
@ -293,11 +294,14 @@ static int caam_probe(struct platform_device *pdev)
return -ENOMEM;
}
cha_vid = rd_reg64(&topregs->ctrl.perfmon.cha_id);
/*
* RNG4 based SECs (v5+) need special initialization prior
* to executing any descriptors
* If SEC has RNG version >= 4 and RNG state handle has not been
* already instantiated ,do RNG instantiation
*/
if (of_device_is_compatible(nprop, "fsl,sec-v5.0")) {
if ((cha_vid & CHA_ID_RNG_MASK) >> CHA_ID_RNG_SHIFT >= 4 &&
!(rd_reg32(&topregs->ctrl.r4tst[0].rdsta) & RDSTA_IF0)) {
kick_trng(pdev);
ret = instantiate_rng(ctrlpriv->jrdev[0]);
if (ret) {

View File

@ -231,7 +231,12 @@ struct sec4_sg_entry {
#define LDST_SRCDST_WORD_PKHA_B_SZ (0x11 << LDST_SRCDST_SHIFT)
#define LDST_SRCDST_WORD_PKHA_N_SZ (0x12 << LDST_SRCDST_SHIFT)
#define LDST_SRCDST_WORD_PKHA_E_SZ (0x13 << LDST_SRCDST_SHIFT)
#define LDST_SRCDST_WORD_CLASS_CTX (0x20 << LDST_SRCDST_SHIFT)
#define LDST_SRCDST_WORD_DESCBUF (0x40 << LDST_SRCDST_SHIFT)
#define LDST_SRCDST_WORD_DESCBUF_JOB (0x41 << LDST_SRCDST_SHIFT)
#define LDST_SRCDST_WORD_DESCBUF_SHARED (0x42 << LDST_SRCDST_SHIFT)
#define LDST_SRCDST_WORD_DESCBUF_JOB_WE (0x45 << LDST_SRCDST_SHIFT)
#define LDST_SRCDST_WORD_DESCBUF_SHARED_WE (0x46 << LDST_SRCDST_SHIFT)
#define LDST_SRCDST_WORD_INFO_FIFO (0x7a << LDST_SRCDST_SHIFT)
/* Offset in source/destination */
@ -366,6 +371,7 @@ struct sec4_sg_entry {
#define FIFOLD_TYPE_LAST2FLUSH1 (0x05 << FIFOLD_TYPE_SHIFT)
#define FIFOLD_TYPE_LASTBOTH (0x06 << FIFOLD_TYPE_SHIFT)
#define FIFOLD_TYPE_LASTBOTHFL (0x07 << FIFOLD_TYPE_SHIFT)
#define FIFOLD_TYPE_NOINFOFIFO (0x0F << FIFOLD_TYPE_SHIFT)
#define FIFOLDST_LEN_MASK 0xffff
#define FIFOLDST_EXT_LEN_MASK 0xffffffff
@ -1294,10 +1300,10 @@ struct sec4_sg_entry {
#define SQOUT_SGF 0x01000000
/* Appends to a previous pointer */
#define SQOUT_PRE 0x00800000
#define SQOUT_PRE SQIN_PRE
/* Restore sequence with pointer/length */
#define SQOUT_RTO 0x00200000
#define SQOUT_RTO SQIN_RTO
/* Use extended length following pointer */
#define SQOUT_EXT 0x00400000
@ -1359,6 +1365,7 @@ struct sec4_sg_entry {
#define MOVE_DEST_MATH3 (0x07 << MOVE_DEST_SHIFT)
#define MOVE_DEST_CLASS1INFIFO (0x08 << MOVE_DEST_SHIFT)
#define MOVE_DEST_CLASS2INFIFO (0x09 << MOVE_DEST_SHIFT)
#define MOVE_DEST_INFIFO_NOINFO (0x0a << MOVE_DEST_SHIFT)
#define MOVE_DEST_PK_A (0x0c << MOVE_DEST_SHIFT)
#define MOVE_DEST_CLASS1KEY (0x0d << MOVE_DEST_SHIFT)
#define MOVE_DEST_CLASS2KEY (0x0e << MOVE_DEST_SHIFT)
@ -1411,6 +1418,7 @@ struct sec4_sg_entry {
#define MATH_SRC0_REG2 (0x02 << MATH_SRC0_SHIFT)
#define MATH_SRC0_REG3 (0x03 << MATH_SRC0_SHIFT)
#define MATH_SRC0_IMM (0x04 << MATH_SRC0_SHIFT)
#define MATH_SRC0_DPOVRD (0x07 << MATH_SRC0_SHIFT)
#define MATH_SRC0_SEQINLEN (0x08 << MATH_SRC0_SHIFT)
#define MATH_SRC0_SEQOUTLEN (0x09 << MATH_SRC0_SHIFT)
#define MATH_SRC0_VARSEQINLEN (0x0a << MATH_SRC0_SHIFT)
@ -1425,6 +1433,7 @@ struct sec4_sg_entry {
#define MATH_SRC1_REG2 (0x02 << MATH_SRC1_SHIFT)
#define MATH_SRC1_REG3 (0x03 << MATH_SRC1_SHIFT)
#define MATH_SRC1_IMM (0x04 << MATH_SRC1_SHIFT)
#define MATH_SRC1_DPOVRD (0x07 << MATH_SRC0_SHIFT)
#define MATH_SRC1_INFIFO (0x0a << MATH_SRC1_SHIFT)
#define MATH_SRC1_OUTFIFO (0x0b << MATH_SRC1_SHIFT)
#define MATH_SRC1_ONE (0x0c << MATH_SRC1_SHIFT)
@ -1600,4 +1609,13 @@ struct sec4_sg_entry {
#define NFIFOENTRY_PLEN_SHIFT 0
#define NFIFOENTRY_PLEN_MASK (0xFF << NFIFOENTRY_PLEN_SHIFT)
/* Append Load Immediate Command */
#define FD_CMD_APPEND_LOAD_IMMEDIATE 0x80000000
/* Set SEQ LIODN equal to the Non-SEQ LIODN for the job */
#define FD_CMD_SET_SEQ_LIODN_EQUAL_NONSEQ_LIODN 0x40000000
/* Frame Descriptor Command for Replacement Job Descriptor */
#define FD_CMD_REPLACE_JOB_DESC 0x20000000
#endif /* DESC_H */

View File

@ -110,6 +110,26 @@ static inline void append_cmd(u32 *desc, u32 command)
(*desc)++;
}
#define append_u32 append_cmd
static inline void append_u64(u32 *desc, u64 data)
{
u32 *offset = desc_end(desc);
*offset = upper_32_bits(data);
*(++offset) = lower_32_bits(data);
(*desc) += 2;
}
/* Write command without affecting header, and return pointer to next word */
static inline u32 *write_cmd(u32 *desc, u32 command)
{
*desc = command;
return desc + 1;
}
static inline void append_cmd_ptr(u32 *desc, dma_addr_t ptr, int len,
u32 command)
{
@ -122,7 +142,8 @@ static inline void append_cmd_ptr_extlen(u32 *desc, dma_addr_t ptr,
unsigned int len, u32 command)
{
append_cmd(desc, command);
append_ptr(desc, ptr);
if (!(command & (SQIN_RTO | SQIN_PRE)))
append_ptr(desc, ptr);
append_cmd(desc, len);
}
@ -176,17 +197,36 @@ static inline void append_##cmd(u32 *desc, dma_addr_t ptr, unsigned int len, \
}
APPEND_CMD_PTR(key, KEY)
APPEND_CMD_PTR(load, LOAD)
APPEND_CMD_PTR(store, STORE)
APPEND_CMD_PTR(fifo_load, FIFO_LOAD)
APPEND_CMD_PTR(fifo_store, FIFO_STORE)
static inline void append_store(u32 *desc, dma_addr_t ptr, unsigned int len,
u32 options)
{
u32 cmd_src;
cmd_src = options & LDST_SRCDST_MASK;
append_cmd(desc, CMD_STORE | options | len);
/* The following options do not require pointer */
if (!(cmd_src == LDST_SRCDST_WORD_DESCBUF_SHARED ||
cmd_src == LDST_SRCDST_WORD_DESCBUF_JOB ||
cmd_src == LDST_SRCDST_WORD_DESCBUF_JOB_WE ||
cmd_src == LDST_SRCDST_WORD_DESCBUF_SHARED_WE))
append_ptr(desc, ptr);
}
#define APPEND_SEQ_PTR_INTLEN(cmd, op) \
static inline void append_seq_##cmd##_ptr_intlen(u32 *desc, dma_addr_t ptr, \
unsigned int len, \
u32 options) \
{ \
PRINT_POS; \
append_cmd_ptr(desc, ptr, len, CMD_SEQ_##op##_PTR | options); \
if (options & (SQIN_RTO | SQIN_PRE)) \
append_cmd(desc, CMD_SEQ_##op##_PTR | len | options); \
else \
append_cmd_ptr(desc, ptr, len, CMD_SEQ_##op##_PTR | options); \
}
APPEND_SEQ_PTR_INTLEN(in, IN)
APPEND_SEQ_PTR_INTLEN(out, OUT)
@ -259,7 +299,7 @@ APPEND_CMD_RAW_IMM(load, LOAD, u32);
*/
#define APPEND_MATH(op, desc, dest, src_0, src_1, len) \
append_cmd(desc, CMD_MATH | MATH_FUN_##op | MATH_DEST_##dest | \
MATH_SRC0_##src_0 | MATH_SRC1_##src_1 | (u32) (len & MATH_LEN_MASK));
MATH_SRC0_##src_0 | MATH_SRC1_##src_1 | (u32)len);
#define append_math_add(desc, dest, src0, src1, len) \
APPEND_MATH(ADD, desc, dest, src0, src1, len)
@ -279,6 +319,8 @@ append_cmd(desc, CMD_MATH | MATH_FUN_##op | MATH_DEST_##dest | \
APPEND_MATH(LSHIFT, desc, dest, src0, src1, len)
#define append_math_rshift(desc, dest, src0, src1, len) \
APPEND_MATH(RSHIFT, desc, dest, src0, src1, len)
#define append_math_ldshift(desc, dest, src0, src1, len) \
APPEND_MATH(SHLD, desc, dest, src0, src1, len)
/* Exactly one source is IMM. Data is passed in as u32 value */
#define APPEND_MATH_IMM_u32(op, desc, dest, src_0, src_1, data) \
@ -305,3 +347,34 @@ do { \
APPEND_MATH_IMM_u32(LSHIFT, desc, dest, src0, src1, data)
#define append_math_rshift_imm_u32(desc, dest, src0, src1, data) \
APPEND_MATH_IMM_u32(RSHIFT, desc, dest, src0, src1, data)
/* Exactly one source is IMM. Data is passed in as u64 value */
#define APPEND_MATH_IMM_u64(op, desc, dest, src_0, src_1, data) \
do { \
u32 upper = (data >> 16) >> 16; \
APPEND_MATH(op, desc, dest, src_0, src_1, CAAM_CMD_SZ * 2 | \
(upper ? 0 : MATH_IFB)); \
if (upper) \
append_u64(desc, data); \
else \
append_u32(desc, data); \
} while (0)
#define append_math_add_imm_u64(desc, dest, src0, src1, data) \
APPEND_MATH_IMM_u64(ADD, desc, dest, src0, src1, data)
#define append_math_sub_imm_u64(desc, dest, src0, src1, data) \
APPEND_MATH_IMM_u64(SUB, desc, dest, src0, src1, data)
#define append_math_add_c_imm_u64(desc, dest, src0, src1, data) \
APPEND_MATH_IMM_u64(ADDC, desc, dest, src0, src1, data)
#define append_math_sub_b_imm_u64(desc, dest, src0, src1, data) \
APPEND_MATH_IMM_u64(SUBB, desc, dest, src0, src1, data)
#define append_math_and_imm_u64(desc, dest, src0, src1, data) \
APPEND_MATH_IMM_u64(AND, desc, dest, src0, src1, data)
#define append_math_or_imm_u64(desc, dest, src0, src1, data) \
APPEND_MATH_IMM_u64(OR, desc, dest, src0, src1, data)
#define append_math_xor_imm_u64(desc, dest, src0, src1, data) \
APPEND_MATH_IMM_u64(XOR, desc, dest, src0, src1, data)
#define append_math_lshift_imm_u64(desc, dest, src0, src1, data) \
APPEND_MATH_IMM_u64(LSHIFT, desc, dest, src0, src1, data)
#define append_math_rshift_imm_u64(desc, dest, src0, src1, data) \
APPEND_MATH_IMM_u64(RSHIFT, desc, dest, src0, src1, data)

View File

@ -44,6 +44,7 @@
#define PDBOPTS_ESP_IPHDRSRC 0x08 /* IP header comes from PDB (encap) */
#define PDBOPTS_ESP_INCIPHDR 0x04 /* Prepend IP header to output frame */
#define PDBOPTS_ESP_IPVSN 0x02 /* process IPv6 header */
#define PDBOPTS_ESP_AOFL 0x04 /* adjust out frame len (decap, SEC>=5.3)*/
#define PDBOPTS_ESP_TUNNEL 0x01 /* tunnel mode next-header byte */
#define PDBOPTS_ESP_IPV6 0x02 /* ip header version is V6 */
#define PDBOPTS_ESP_DIFFSERV 0x40 /* copy TOS/TC from inner iphdr */

View File

@ -117,6 +117,43 @@ struct jr_outentry {
#define CHA_NUM_DECONUM_SHIFT 56
#define CHA_NUM_DECONUM_MASK (0xfull << CHA_NUM_DECONUM_SHIFT)
/* CHA Version IDs */
#define CHA_ID_AES_SHIFT 0
#define CHA_ID_AES_MASK (0xfull << CHA_ID_AES_SHIFT)
#define CHA_ID_DES_SHIFT 4
#define CHA_ID_DES_MASK (0xfull << CHA_ID_DES_SHIFT)
#define CHA_ID_ARC4_SHIFT 8
#define CHA_ID_ARC4_MASK (0xfull << CHA_ID_ARC4_SHIFT)
#define CHA_ID_MD_SHIFT 12
#define CHA_ID_MD_MASK (0xfull << CHA_ID_MD_SHIFT)
#define CHA_ID_RNG_SHIFT 16
#define CHA_ID_RNG_MASK (0xfull << CHA_ID_RNG_SHIFT)
#define CHA_ID_SNW8_SHIFT 20
#define CHA_ID_SNW8_MASK (0xfull << CHA_ID_SNW8_SHIFT)
#define CHA_ID_KAS_SHIFT 24
#define CHA_ID_KAS_MASK (0xfull << CHA_ID_KAS_SHIFT)
#define CHA_ID_PK_SHIFT 28
#define CHA_ID_PK_MASK (0xfull << CHA_ID_PK_SHIFT)
#define CHA_ID_CRC_SHIFT 32
#define CHA_ID_CRC_MASK (0xfull << CHA_ID_CRC_SHIFT)
#define CHA_ID_SNW9_SHIFT 36
#define CHA_ID_SNW9_MASK (0xfull << CHA_ID_SNW9_SHIFT)
#define CHA_ID_DECO_SHIFT 56
#define CHA_ID_DECO_MASK (0xfull << CHA_ID_DECO_SHIFT)
#define CHA_ID_JR_SHIFT 60
#define CHA_ID_JR_MASK (0xfull << CHA_ID_JR_SHIFT)
struct sec_vid {
u16 ip_id;
u8 maj_rev;
@ -228,7 +265,10 @@ struct rng4tst {
u32 rtfrqmax; /* PRGM=1: freq. count max. limit register */
u32 rtfrqcnt; /* PRGM=0: freq. count register */
};
u32 rsvd1[56];
u32 rsvd1[40];
#define RDSTA_IF0 0x00000001
u32 rdsta;
u32 rsvd2[15];
};
/*

912
drivers/crypto/dcp.c Normal file
View File

@ -0,0 +1,912 @@
/*
* Cryptographic API.
*
* Support for DCP cryptographic accelerator.
*
* Copyright (c) 2013
* Author: Tobias Rauter <tobias.rauter@gmail.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as published
* by the Free Software Foundation.
*
* Based on tegra-aes.c, dcp.c (from freescale SDK) and sahara.c
*/
#include <linux/module.h>
#include <linux/init.h>
#include <linux/errno.h>
#include <linux/kernel.h>
#include <linux/platform_device.h>
#include <linux/dma-mapping.h>
#include <linux/io.h>
#include <linux/mutex.h>
#include <linux/interrupt.h>
#include <linux/completion.h>
#include <linux/workqueue.h>
#include <linux/delay.h>
#include <linux/crypto.h>
#include <linux/miscdevice.h>
#include <crypto/scatterwalk.h>
#include <crypto/aes.h>
/* IOCTL for DCP OTP Key AES - taken from Freescale's SDK*/
#define DBS_IOCTL_BASE 'd'
#define DBS_ENC _IOW(DBS_IOCTL_BASE, 0x00, uint8_t[16])
#define DBS_DEC _IOW(DBS_IOCTL_BASE, 0x01, uint8_t[16])
/* DCP channel used for AES */
#define USED_CHANNEL 1
/* Ring Buffers' maximum size */
#define DCP_MAX_PKG 20
/* Control Register */
#define DCP_REG_CTRL 0x000
#define DCP_CTRL_SFRST (1<<31)
#define DCP_CTRL_CLKGATE (1<<30)
#define DCP_CTRL_CRYPTO_PRESENT (1<<29)
#define DCP_CTRL_SHA_PRESENT (1<<28)
#define DCP_CTRL_GATHER_RES_WRITE (1<<23)
#define DCP_CTRL_ENABLE_CONTEXT_CACHE (1<<22)
#define DCP_CTRL_ENABLE_CONTEXT_SWITCH (1<<21)
#define DCP_CTRL_CH_IRQ_E_0 0x01
#define DCP_CTRL_CH_IRQ_E_1 0x02
#define DCP_CTRL_CH_IRQ_E_2 0x04
#define DCP_CTRL_CH_IRQ_E_3 0x08
/* Status register */
#define DCP_REG_STAT 0x010
#define DCP_STAT_OTP_KEY_READY (1<<28)
#define DCP_STAT_CUR_CHANNEL(stat) ((stat>>24)&0x0F)
#define DCP_STAT_READY_CHANNEL(stat) ((stat>>16)&0x0F)
#define DCP_STAT_IRQ(stat) (stat&0x0F)
#define DCP_STAT_CHAN_0 (0x01)
#define DCP_STAT_CHAN_1 (0x02)
#define DCP_STAT_CHAN_2 (0x04)
#define DCP_STAT_CHAN_3 (0x08)
/* Channel Control Register */
#define DCP_REG_CHAN_CTRL 0x020
#define DCP_CHAN_CTRL_CH0_IRQ_MERGED (1<<16)
#define DCP_CHAN_CTRL_HIGH_PRIO_0 (0x0100)
#define DCP_CHAN_CTRL_HIGH_PRIO_1 (0x0200)
#define DCP_CHAN_CTRL_HIGH_PRIO_2 (0x0400)
#define DCP_CHAN_CTRL_HIGH_PRIO_3 (0x0800)
#define DCP_CHAN_CTRL_ENABLE_0 (0x01)
#define DCP_CHAN_CTRL_ENABLE_1 (0x02)
#define DCP_CHAN_CTRL_ENABLE_2 (0x04)
#define DCP_CHAN_CTRL_ENABLE_3 (0x08)
/*
* Channel Registers:
* The DCP has 4 channels. Each of this channels
* has 4 registers (command pointer, semaphore, status and options).
* The address of register REG of channel CHAN is obtained by
* dcp_chan_reg(REG, CHAN)
*/
#define DCP_REG_CHAN_PTR 0x00000100
#define DCP_REG_CHAN_SEMA 0x00000110
#define DCP_REG_CHAN_STAT 0x00000120
#define DCP_REG_CHAN_OPT 0x00000130
#define DCP_CHAN_STAT_NEXT_CHAIN_IS_0 0x010000
#define DCP_CHAN_STAT_NO_CHAIN 0x020000
#define DCP_CHAN_STAT_CONTEXT_ERROR 0x030000
#define DCP_CHAN_STAT_PAYLOAD_ERROR 0x040000
#define DCP_CHAN_STAT_INVALID_MODE 0x050000
#define DCP_CHAN_STAT_PAGEFAULT 0x40
#define DCP_CHAN_STAT_DST 0x20
#define DCP_CHAN_STAT_SRC 0x10
#define DCP_CHAN_STAT_PACKET 0x08
#define DCP_CHAN_STAT_SETUP 0x04
#define DCP_CHAN_STAT_MISMATCH 0x02
/* hw packet control*/
#define DCP_PKT_PAYLOAD_KEY (1<<11)
#define DCP_PKT_OTP_KEY (1<<10)
#define DCP_PKT_CIPHER_INIT (1<<9)
#define DCP_PKG_CIPHER_ENCRYPT (1<<8)
#define DCP_PKT_CIPHER_ENABLE (1<<5)
#define DCP_PKT_DECR_SEM (1<<1)
#define DCP_PKT_CHAIN (1<<2)
#define DCP_PKT_IRQ 1
#define DCP_PKT_MODE_CBC (1<<4)
#define DCP_PKT_KEYSELECT_OTP (0xFF<<8)
/* cipher flags */
#define DCP_ENC 0x0001
#define DCP_DEC 0x0002
#define DCP_ECB 0x0004
#define DCP_CBC 0x0008
#define DCP_CBC_INIT 0x0010
#define DCP_NEW_KEY 0x0040
#define DCP_OTP_KEY 0x0080
#define DCP_AES 0x1000
/* DCP Flags */
#define DCP_FLAG_BUSY 0x01
#define DCP_FLAG_PRODUCING 0x02
/* clock defines */
#define CLOCK_ON 1
#define CLOCK_OFF 0
struct dcp_dev_req_ctx {
int mode;
};
struct dcp_op {
unsigned int flags;
u8 key[AES_KEYSIZE_128];
int keylen;
struct ablkcipher_request *req;
struct crypto_ablkcipher *fallback;
uint32_t stat;
uint32_t pkt1;
uint32_t pkt2;
struct ablkcipher_walk walk;
};
struct dcp_dev {
struct device *dev;
void __iomem *dcp_regs_base;
int dcp_vmi_irq;
int dcp_irq;
spinlock_t queue_lock;
struct crypto_queue queue;
uint32_t pkt_produced;
uint32_t pkt_consumed;
struct dcp_hw_packet *hw_pkg[DCP_MAX_PKG];
dma_addr_t hw_phys_pkg;
/* [KEY][IV] Both with 16 Bytes */
u8 *payload_base;
dma_addr_t payload_base_dma;
struct tasklet_struct done_task;
struct tasklet_struct queue_task;
struct timer_list watchdog;
unsigned long flags;
struct dcp_op *ctx;
struct miscdevice dcp_bootstream_misc;
};
struct dcp_hw_packet {
uint32_t next;
uint32_t pkt1;
uint32_t pkt2;
uint32_t src;
uint32_t dst;
uint32_t size;
uint32_t payload;
uint32_t stat;
};
static struct dcp_dev *global_dev;
static inline u32 dcp_chan_reg(u32 reg, int chan)
{
return reg + (chan) * 0x40;
}
static inline void dcp_write(struct dcp_dev *dev, u32 data, u32 reg)
{
writel(data, dev->dcp_regs_base + reg);
}
static inline void dcp_set(struct dcp_dev *dev, u32 data, u32 reg)
{
writel(data, dev->dcp_regs_base + (reg | 0x04));
}
static inline void dcp_clear(struct dcp_dev *dev, u32 data, u32 reg)
{
writel(data, dev->dcp_regs_base + (reg | 0x08));
}
static inline void dcp_toggle(struct dcp_dev *dev, u32 data, u32 reg)
{
writel(data, dev->dcp_regs_base + (reg | 0x0C));
}
static inline unsigned int dcp_read(struct dcp_dev *dev, u32 reg)
{
return readl(dev->dcp_regs_base + reg);
}
static void dcp_dma_unmap(struct dcp_dev *dev, struct dcp_hw_packet *pkt)
{
dma_unmap_page(dev->dev, pkt->src, pkt->size, DMA_TO_DEVICE);
dma_unmap_page(dev->dev, pkt->dst, pkt->size, DMA_FROM_DEVICE);
dev_dbg(dev->dev, "unmap packet %x", (unsigned int) pkt);
}
static int dcp_dma_map(struct dcp_dev *dev,
struct ablkcipher_walk *walk, struct dcp_hw_packet *pkt)
{
dev_dbg(dev->dev, "map packet %x", (unsigned int) pkt);
/* align to length = 16 */
pkt->size = walk->nbytes - (walk->nbytes % 16);
pkt->src = dma_map_page(dev->dev, walk->src.page, walk->src.offset,
pkt->size, DMA_TO_DEVICE);
if (pkt->src == 0) {
dev_err(dev->dev, "Unable to map src");
return -ENOMEM;
}
pkt->dst = dma_map_page(dev->dev, walk->dst.page, walk->dst.offset,
pkt->size, DMA_FROM_DEVICE);
if (pkt->dst == 0) {
dev_err(dev->dev, "Unable to map dst");
dma_unmap_page(dev->dev, pkt->src, pkt->size, DMA_TO_DEVICE);
return -ENOMEM;
}
return 0;
}
static void dcp_op_one(struct dcp_dev *dev, struct dcp_hw_packet *pkt,
uint8_t last)
{
struct dcp_op *ctx = dev->ctx;
pkt->pkt1 = ctx->pkt1;
pkt->pkt2 = ctx->pkt2;
pkt->payload = (u32) dev->payload_base_dma;
pkt->stat = 0;
if (ctx->flags & DCP_CBC_INIT) {
pkt->pkt1 |= DCP_PKT_CIPHER_INIT;
ctx->flags &= ~DCP_CBC_INIT;
}
mod_timer(&dev->watchdog, jiffies + msecs_to_jiffies(500));
pkt->pkt1 |= DCP_PKT_IRQ;
if (!last)
pkt->pkt1 |= DCP_PKT_CHAIN;
dev->pkt_produced++;
dcp_write(dev, 1,
dcp_chan_reg(DCP_REG_CHAN_SEMA, USED_CHANNEL));
}
static void dcp_op_proceed(struct dcp_dev *dev)
{
struct dcp_op *ctx = dev->ctx;
struct dcp_hw_packet *pkt;
while (ctx->walk.nbytes) {
int err = 0;
pkt = dev->hw_pkg[dev->pkt_produced % DCP_MAX_PKG];
err = dcp_dma_map(dev, &ctx->walk, pkt);
if (err) {
dev->ctx->stat |= err;
/* start timer to wait for already set up calls */
mod_timer(&dev->watchdog,
jiffies + msecs_to_jiffies(500));
break;
}
err = ctx->walk.nbytes - pkt->size;
ablkcipher_walk_done(dev->ctx->req, &dev->ctx->walk, err);
dcp_op_one(dev, pkt, ctx->walk.nbytes == 0);
/* we have to wait if no space is left in buffer */
if (dev->pkt_produced - dev->pkt_consumed == DCP_MAX_PKG)
break;
}
clear_bit(DCP_FLAG_PRODUCING, &dev->flags);
}
static void dcp_op_start(struct dcp_dev *dev, uint8_t use_walk)
{
struct dcp_op *ctx = dev->ctx;
if (ctx->flags & DCP_NEW_KEY) {
memcpy(dev->payload_base, ctx->key, ctx->keylen);
ctx->flags &= ~DCP_NEW_KEY;
}
ctx->pkt1 = 0;
ctx->pkt1 |= DCP_PKT_CIPHER_ENABLE;
ctx->pkt1 |= DCP_PKT_DECR_SEM;
if (ctx->flags & DCP_OTP_KEY)
ctx->pkt1 |= DCP_PKT_OTP_KEY;
else
ctx->pkt1 |= DCP_PKT_PAYLOAD_KEY;
if (ctx->flags & DCP_ENC)
ctx->pkt1 |= DCP_PKG_CIPHER_ENCRYPT;
ctx->pkt2 = 0;
if (ctx->flags & DCP_CBC)
ctx->pkt2 |= DCP_PKT_MODE_CBC;
dev->pkt_produced = 0;
dev->pkt_consumed = 0;
ctx->stat = 0;
dcp_clear(dev, -1, dcp_chan_reg(DCP_REG_CHAN_STAT, USED_CHANNEL));
dcp_write(dev, (u32) dev->hw_phys_pkg,
dcp_chan_reg(DCP_REG_CHAN_PTR, USED_CHANNEL));
set_bit(DCP_FLAG_PRODUCING, &dev->flags);
if (use_walk) {
ablkcipher_walk_init(&ctx->walk, ctx->req->dst,
ctx->req->src, ctx->req->nbytes);
ablkcipher_walk_phys(ctx->req, &ctx->walk);
dcp_op_proceed(dev);
} else {
dcp_op_one(dev, dev->hw_pkg[0], 1);
clear_bit(DCP_FLAG_PRODUCING, &dev->flags);
}
}
static void dcp_done_task(unsigned long data)
{
struct dcp_dev *dev = (struct dcp_dev *)data;
struct dcp_hw_packet *last_packet;
int fin;
fin = 0;
for (last_packet = dev->hw_pkg[(dev->pkt_consumed) % DCP_MAX_PKG];
last_packet->stat == 1;
last_packet =
dev->hw_pkg[++(dev->pkt_consumed) % DCP_MAX_PKG]) {
dcp_dma_unmap(dev, last_packet);
last_packet->stat = 0;
fin++;
}
/* the last call of this function already consumed this IRQ's packet */
if (fin == 0)
return;
dev_dbg(dev->dev,
"Packet(s) done with status %x; finished: %d, produced:%d, complete consumed: %d",
dev->ctx->stat, fin, dev->pkt_produced, dev->pkt_consumed);
last_packet = dev->hw_pkg[(dev->pkt_consumed - 1) % DCP_MAX_PKG];
if (!dev->ctx->stat && last_packet->pkt1 & DCP_PKT_CHAIN) {
if (!test_and_set_bit(DCP_FLAG_PRODUCING, &dev->flags))
dcp_op_proceed(dev);
return;
}
while (unlikely(dev->pkt_consumed < dev->pkt_produced)) {
dcp_dma_unmap(dev,
dev->hw_pkg[dev->pkt_consumed++ % DCP_MAX_PKG]);
}
if (dev->ctx->flags & DCP_OTP_KEY) {
/* we used the miscdevice, no walk to finish */
clear_bit(DCP_FLAG_BUSY, &dev->flags);
return;
}
ablkcipher_walk_complete(&dev->ctx->walk);
dev->ctx->req->base.complete(&dev->ctx->req->base,
dev->ctx->stat);
dev->ctx->req = NULL;
/* in case there are other requests in the queue */
tasklet_schedule(&dev->queue_task);
}
static void dcp_watchdog(unsigned long data)
{
struct dcp_dev *dev = (struct dcp_dev *)data;
dev->ctx->stat |= dcp_read(dev,
dcp_chan_reg(DCP_REG_CHAN_STAT, USED_CHANNEL));
dev_err(dev->dev, "Timeout, Channel status: %x", dev->ctx->stat);
if (!dev->ctx->stat)
dev->ctx->stat = -ETIMEDOUT;
dcp_done_task(data);
}
static irqreturn_t dcp_common_irq(int irq, void *context)
{
u32 msk;
struct dcp_dev *dev = (struct dcp_dev *) context;
del_timer(&dev->watchdog);
msk = DCP_STAT_IRQ(dcp_read(dev, DCP_REG_STAT));
dcp_clear(dev, msk, DCP_REG_STAT);
if (msk == 0)
return IRQ_NONE;
dev->ctx->stat |= dcp_read(dev,
dcp_chan_reg(DCP_REG_CHAN_STAT, USED_CHANNEL));
if (msk & DCP_STAT_CHAN_1)
tasklet_schedule(&dev->done_task);
return IRQ_HANDLED;
}
static irqreturn_t dcp_vmi_irq(int irq, void *context)
{
return dcp_common_irq(irq, context);
}
static irqreturn_t dcp_irq(int irq, void *context)
{
return dcp_common_irq(irq, context);
}
static void dcp_crypt(struct dcp_dev *dev, struct dcp_op *ctx)
{
dev->ctx = ctx;
if ((ctx->flags & DCP_CBC) && ctx->req->info) {
ctx->flags |= DCP_CBC_INIT;
memcpy(dev->payload_base + AES_KEYSIZE_128,
ctx->req->info, AES_KEYSIZE_128);
}
dcp_op_start(dev, 1);
}
static void dcp_queue_task(unsigned long data)
{
struct dcp_dev *dev = (struct dcp_dev *) data;
struct crypto_async_request *async_req, *backlog;
struct crypto_ablkcipher *tfm;
struct dcp_op *ctx;
struct dcp_dev_req_ctx *rctx;
struct ablkcipher_request *req;
unsigned long flags;
spin_lock_irqsave(&dev->queue_lock, flags);
backlog = crypto_get_backlog(&dev->queue);
async_req = crypto_dequeue_request(&dev->queue);
spin_unlock_irqrestore(&dev->queue_lock, flags);
if (!async_req)
goto ret_nothing_done;
if (backlog)
backlog->complete(backlog, -EINPROGRESS);
req = ablkcipher_request_cast(async_req);
tfm = crypto_ablkcipher_reqtfm(req);
rctx = ablkcipher_request_ctx(req);
ctx = crypto_ablkcipher_ctx(tfm);
if (!req->src || !req->dst)
goto ret_nothing_done;
ctx->flags |= rctx->mode;
ctx->req = req;
dcp_crypt(dev, ctx);
return;
ret_nothing_done:
clear_bit(DCP_FLAG_BUSY, &dev->flags);
}
static int dcp_cra_init(struct crypto_tfm *tfm)
{
const char *name = tfm->__crt_alg->cra_name;
struct dcp_op *ctx = crypto_tfm_ctx(tfm);
tfm->crt_ablkcipher.reqsize = sizeof(struct dcp_dev_req_ctx);
ctx->fallback = crypto_alloc_ablkcipher(name, 0,
CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK);
if (IS_ERR(ctx->fallback)) {
dev_err(global_dev->dev, "Error allocating fallback algo %s\n",
name);
return PTR_ERR(ctx->fallback);
}
return 0;
}
static void dcp_cra_exit(struct crypto_tfm *tfm)
{
struct dcp_op *ctx = crypto_tfm_ctx(tfm);
if (ctx->fallback)
crypto_free_ablkcipher(ctx->fallback);
ctx->fallback = NULL;
}
/* async interface */
static int dcp_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key,
unsigned int len)
{
struct dcp_op *ctx = crypto_ablkcipher_ctx(tfm);
unsigned int ret = 0;
ctx->keylen = len;
ctx->flags = 0;
if (len == AES_KEYSIZE_128) {
if (memcmp(ctx->key, key, AES_KEYSIZE_128)) {
memcpy(ctx->key, key, len);
ctx->flags |= DCP_NEW_KEY;
}
return 0;
}
ctx->fallback->base.crt_flags &= ~CRYPTO_TFM_REQ_MASK;
ctx->fallback->base.crt_flags |=
(tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
ret = crypto_ablkcipher_setkey(ctx->fallback, key, len);
if (ret) {
struct crypto_tfm *tfm_aux = crypto_ablkcipher_tfm(tfm);
tfm_aux->crt_flags &= ~CRYPTO_TFM_RES_MASK;
tfm_aux->crt_flags |=
(ctx->fallback->base.crt_flags & CRYPTO_TFM_RES_MASK);
}
return ret;
}
static int dcp_aes_cbc_crypt(struct ablkcipher_request *req, int mode)
{
struct dcp_dev_req_ctx *rctx = ablkcipher_request_ctx(req);
struct dcp_dev *dev = global_dev;
unsigned long flags;
int err = 0;
if (!IS_ALIGNED(req->nbytes, AES_BLOCK_SIZE))
return -EINVAL;
rctx->mode = mode;
spin_lock_irqsave(&dev->queue_lock, flags);
err = ablkcipher_enqueue_request(&dev->queue, req);
spin_unlock_irqrestore(&dev->queue_lock, flags);
flags = test_and_set_bit(DCP_FLAG_BUSY, &dev->flags);
if (!(flags & DCP_FLAG_BUSY))
tasklet_schedule(&dev->queue_task);
return err;
}
static int dcp_aes_cbc_encrypt(struct ablkcipher_request *req)
{
struct crypto_tfm *tfm =
crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req));
struct dcp_op *ctx = crypto_ablkcipher_ctx(
crypto_ablkcipher_reqtfm(req));
if (unlikely(ctx->keylen != AES_KEYSIZE_128)) {
int err = 0;
ablkcipher_request_set_tfm(req, ctx->fallback);
err = crypto_ablkcipher_encrypt(req);
ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm));
return err;
}
return dcp_aes_cbc_crypt(req, DCP_AES | DCP_ENC | DCP_CBC);
}
static int dcp_aes_cbc_decrypt(struct ablkcipher_request *req)
{
struct crypto_tfm *tfm =
crypto_ablkcipher_tfm(crypto_ablkcipher_reqtfm(req));
struct dcp_op *ctx = crypto_ablkcipher_ctx(
crypto_ablkcipher_reqtfm(req));
if (unlikely(ctx->keylen != AES_KEYSIZE_128)) {
int err = 0;
ablkcipher_request_set_tfm(req, ctx->fallback);
err = crypto_ablkcipher_decrypt(req);
ablkcipher_request_set_tfm(req, __crypto_ablkcipher_cast(tfm));
return err;
}
return dcp_aes_cbc_crypt(req, DCP_AES | DCP_DEC | DCP_CBC);
}
static struct crypto_alg algs[] = {
{
.cra_name = "cbc(aes)",
.cra_driver_name = "dcp-cbc-aes",
.cra_alignmask = 3,
.cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC |
CRYPTO_ALG_NEED_FALLBACK,
.cra_blocksize = AES_KEYSIZE_128,
.cra_type = &crypto_ablkcipher_type,
.cra_priority = 300,
.cra_u.ablkcipher = {
.min_keysize = AES_KEYSIZE_128,
.max_keysize = AES_KEYSIZE_128,
.setkey = dcp_aes_setkey,
.encrypt = dcp_aes_cbc_encrypt,
.decrypt = dcp_aes_cbc_decrypt,
.ivsize = AES_KEYSIZE_128,
}
},
};
/* DCP bootstream verification interface: uses OTP key for crypto */
static int dcp_bootstream_open(struct inode *inode, struct file *file)
{
file->private_data = container_of((file->private_data),
struct dcp_dev, dcp_bootstream_misc);
return 0;
}
static long dcp_bootstream_ioctl(struct file *file,
unsigned int cmd, unsigned long arg)
{
struct dcp_dev *dev = (struct dcp_dev *) file->private_data;
void __user *argp = (void __user *)arg;
int ret;
if (dev == NULL)
return -EBADF;
if (cmd != DBS_ENC && cmd != DBS_DEC)
return -EINVAL;
if (copy_from_user(dev->payload_base, argp, 16))
return -EFAULT;
if (test_and_set_bit(DCP_FLAG_BUSY, &dev->flags))
return -EAGAIN;
dev->ctx = kzalloc(sizeof(struct dcp_op), GFP_KERNEL);
if (!dev->ctx) {
dev_err(dev->dev,
"cannot allocate context for OTP crypto");
clear_bit(DCP_FLAG_BUSY, &dev->flags);
return -ENOMEM;
}
dev->ctx->flags = DCP_AES | DCP_ECB | DCP_OTP_KEY | DCP_CBC_INIT;
dev->ctx->flags |= (cmd == DBS_ENC) ? DCP_ENC : DCP_DEC;
dev->hw_pkg[0]->src = dev->payload_base_dma;
dev->hw_pkg[0]->dst = dev->payload_base_dma;
dev->hw_pkg[0]->size = 16;
dcp_op_start(dev, 0);
while (test_bit(DCP_FLAG_BUSY, &dev->flags))
cpu_relax();
ret = dev->ctx->stat;
if (!ret && copy_to_user(argp, dev->payload_base, 16))
ret = -EFAULT;
kfree(dev->ctx);
return ret;
}
static const struct file_operations dcp_bootstream_fops = {
.owner = THIS_MODULE,
.unlocked_ioctl = dcp_bootstream_ioctl,
.open = dcp_bootstream_open,
};
static int dcp_probe(struct platform_device *pdev)
{
struct dcp_dev *dev = NULL;
struct resource *r;
int i, ret, j;
dev = devm_kzalloc(&pdev->dev, sizeof(*dev), GFP_KERNEL);
if (!dev)
return -ENOMEM;
global_dev = dev;
dev->dev = &pdev->dev;
platform_set_drvdata(pdev, dev);
r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!r) {
dev_err(&pdev->dev, "failed to get IORESOURCE_MEM\n");
return -ENXIO;
}
dev->dcp_regs_base = devm_ioremap(&pdev->dev, r->start,
resource_size(r));
dcp_set(dev, DCP_CTRL_SFRST, DCP_REG_CTRL);
udelay(10);
dcp_clear(dev, DCP_CTRL_SFRST | DCP_CTRL_CLKGATE, DCP_REG_CTRL);
dcp_write(dev, DCP_CTRL_GATHER_RES_WRITE |
DCP_CTRL_ENABLE_CONTEXT_CACHE | DCP_CTRL_CH_IRQ_E_1,
DCP_REG_CTRL);
dcp_write(dev, DCP_CHAN_CTRL_ENABLE_1, DCP_REG_CHAN_CTRL);
for (i = 0; i < 4; i++)
dcp_clear(dev, -1, dcp_chan_reg(DCP_REG_CHAN_STAT, i));
dcp_clear(dev, -1, DCP_REG_STAT);
r = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
if (!r) {
dev_err(&pdev->dev, "can't get IRQ resource (0)\n");
return -EIO;
}
dev->dcp_vmi_irq = r->start;
ret = request_irq(dev->dcp_vmi_irq, dcp_vmi_irq, 0, "dcp", dev);
if (ret != 0) {
dev_err(&pdev->dev, "can't request_irq (0)\n");
return -EIO;
}
r = platform_get_resource(pdev, IORESOURCE_IRQ, 1);
if (!r) {
dev_err(&pdev->dev, "can't get IRQ resource (1)\n");
ret = -EIO;
goto err_free_irq0;
}
dev->dcp_irq = r->start;
ret = request_irq(dev->dcp_irq, dcp_irq, 0, "dcp", dev);
if (ret != 0) {
dev_err(&pdev->dev, "can't request_irq (1)\n");
ret = -EIO;
goto err_free_irq0;
}
dev->hw_pkg[0] = dma_alloc_coherent(&pdev->dev,
DCP_MAX_PKG * sizeof(struct dcp_hw_packet),
&dev->hw_phys_pkg,
GFP_KERNEL);
if (!dev->hw_pkg[0]) {
dev_err(&pdev->dev, "Could not allocate hw descriptors\n");
ret = -ENOMEM;
goto err_free_irq1;
}
for (i = 1; i < DCP_MAX_PKG; i++) {
dev->hw_pkg[i - 1]->next = dev->hw_phys_pkg
+ i * sizeof(struct dcp_hw_packet);
dev->hw_pkg[i] = dev->hw_pkg[i - 1] + 1;
}
dev->hw_pkg[i - 1]->next = dev->hw_phys_pkg;
dev->payload_base = dma_alloc_coherent(&pdev->dev, 2 * AES_KEYSIZE_128,
&dev->payload_base_dma, GFP_KERNEL);
if (!dev->payload_base) {
dev_err(&pdev->dev, "Could not allocate memory for key\n");
ret = -ENOMEM;
goto err_free_hw_packet;
}
tasklet_init(&dev->queue_task, dcp_queue_task,
(unsigned long) dev);
tasklet_init(&dev->done_task, dcp_done_task,
(unsigned long) dev);
spin_lock_init(&dev->queue_lock);
crypto_init_queue(&dev->queue, 10);
init_timer(&dev->watchdog);
dev->watchdog.function = &dcp_watchdog;
dev->watchdog.data = (unsigned long)dev;
dev->dcp_bootstream_misc.minor = MISC_DYNAMIC_MINOR,
dev->dcp_bootstream_misc.name = "dcpboot",
dev->dcp_bootstream_misc.fops = &dcp_bootstream_fops,
ret = misc_register(&dev->dcp_bootstream_misc);
if (ret != 0) {
dev_err(dev->dev, "Unable to register misc device\n");
goto err_free_key_iv;
}
for (i = 0; i < ARRAY_SIZE(algs); i++) {
algs[i].cra_priority = 300;
algs[i].cra_ctxsize = sizeof(struct dcp_op);
algs[i].cra_module = THIS_MODULE;
algs[i].cra_init = dcp_cra_init;
algs[i].cra_exit = dcp_cra_exit;
if (crypto_register_alg(&algs[i])) {
dev_err(&pdev->dev, "register algorithm failed\n");
ret = -ENOMEM;
goto err_unregister;
}
}
dev_notice(&pdev->dev, "DCP crypto enabled.!\n");
return 0;
err_unregister:
for (j = 0; j < i; j++)
crypto_unregister_alg(&algs[j]);
err_free_key_iv:
dma_free_coherent(&pdev->dev, 2 * AES_KEYSIZE_128, dev->payload_base,
dev->payload_base_dma);
err_free_hw_packet:
dma_free_coherent(&pdev->dev, DCP_MAX_PKG *
sizeof(struct dcp_hw_packet), dev->hw_pkg[0],
dev->hw_phys_pkg);
err_free_irq1:
free_irq(dev->dcp_irq, dev);
err_free_irq0:
free_irq(dev->dcp_vmi_irq, dev);
return ret;
}
static int dcp_remove(struct platform_device *pdev)
{
struct dcp_dev *dev;
int j;
dev = platform_get_drvdata(pdev);
dma_free_coherent(&pdev->dev,
DCP_MAX_PKG * sizeof(struct dcp_hw_packet),
dev->hw_pkg[0], dev->hw_phys_pkg);
dma_free_coherent(&pdev->dev, 2 * AES_KEYSIZE_128, dev->payload_base,
dev->payload_base_dma);
free_irq(dev->dcp_irq, dev);
free_irq(dev->dcp_vmi_irq, dev);
tasklet_kill(&dev->done_task);
tasklet_kill(&dev->queue_task);
for (j = 0; j < ARRAY_SIZE(algs); j++)
crypto_unregister_alg(&algs[j]);
misc_deregister(&dev->dcp_bootstream_misc);
return 0;
}
static struct of_device_id fs_dcp_of_match[] = {
{ .compatible = "fsl-dcp"},
{},
};
static struct platform_driver fs_dcp_driver = {
.probe = dcp_probe,
.remove = dcp_remove,
.driver = {
.name = "fsl-dcp",
.owner = THIS_MODULE,
.of_match_table = fs_dcp_of_match
}
};
module_platform_driver(fs_dcp_driver);
MODULE_AUTHOR("Tobias Rauter <tobias.rauter@gmail.com>");
MODULE_DESCRIPTION("Freescale DCP Crypto Driver");
MODULE_LICENSE("GPL");

View File

@ -2676,7 +2676,7 @@ err_out_stop_device:
hifn_reset_dma(dev, 1);
hifn_stop_device(dev);
err_out_free_irq:
free_irq(dev->irq, dev->name);
free_irq(dev->irq, dev);
tasklet_kill(&dev->tasklet);
err_out_free_desc:
pci_free_consistent(pdev, sizeof(struct hifn_dma),
@ -2711,7 +2711,7 @@ static void hifn_remove(struct pci_dev *pdev)
hifn_reset_dma(dev, 1);
hifn_stop_device(dev);
free_irq(dev->irq, dev->name);
free_irq(dev->irq, dev);
tasklet_kill(&dev->tasklet);
hifn_flush(dev);

View File

@ -1146,7 +1146,6 @@ err_unmap_reg:
err:
kfree(cp);
cpg = NULL;
platform_set_drvdata(pdev, NULL);
return ret;
}

View File

@ -203,13 +203,6 @@ static void omap_aes_write_n(struct omap_aes_dev *dd, u32 offset,
static int omap_aes_hw_init(struct omap_aes_dev *dd)
{
/*
* clocks are enabled when request starts and disabled when finished.
* It may be long delays between requests.
* Device might go to off mode to save power.
*/
pm_runtime_get_sync(dd->dev);
if (!(dd->flags & FLAGS_INIT)) {
dd->flags |= FLAGS_INIT;
dd->err = 0;
@ -636,7 +629,6 @@ static void omap_aes_finish_req(struct omap_aes_dev *dd, int err)
pr_debug("err: %d\n", err);
pm_runtime_put(dd->dev);
dd->flags &= ~FLAGS_BUSY;
req->base.complete(&req->base, err);
@ -837,8 +829,16 @@ static int omap_aes_ctr_decrypt(struct ablkcipher_request *req)
static int omap_aes_cra_init(struct crypto_tfm *tfm)
{
pr_debug("enter\n");
struct omap_aes_dev *dd = NULL;
/* Find AES device, currently picks the first device */
spin_lock_bh(&list_lock);
list_for_each_entry(dd, &dev_list, list) {
break;
}
spin_unlock_bh(&list_lock);
pm_runtime_get_sync(dd->dev);
tfm->crt_ablkcipher.reqsize = sizeof(struct omap_aes_reqctx);
return 0;
@ -846,7 +846,16 @@ static int omap_aes_cra_init(struct crypto_tfm *tfm)
static void omap_aes_cra_exit(struct crypto_tfm *tfm)
{
pr_debug("enter\n");
struct omap_aes_dev *dd = NULL;
/* Find AES device, currently picks the first device */
spin_lock_bh(&list_lock);
list_for_each_entry(dd, &dev_list, list) {
break;
}
spin_unlock_bh(&list_lock);
pm_runtime_put_sync(dd->dev);
}
/* ********************** ALGS ************************************ */
@ -1125,10 +1134,9 @@ static int omap_aes_probe(struct platform_device *pdev)
if (err)
goto err_res;
dd->io_base = devm_request_and_ioremap(dev, &res);
if (!dd->io_base) {
dev_err(dev, "can't ioremap\n");
err = -ENOMEM;
dd->io_base = devm_ioremap_resource(dev, &res);
if (IS_ERR(dd->io_base)) {
err = PTR_ERR(dd->io_base);
goto err_res;
}
dd->phys_base = res.start;

View File

@ -1686,10 +1686,9 @@ static int omap_sham_probe(struct platform_device *pdev)
if (err)
goto res_err;
dd->io_base = devm_request_and_ioremap(dev, &res);
if (!dd->io_base) {
dev_err(dev, "can't ioremap\n");
err = -ENOMEM;
dd->io_base = devm_ioremap_resource(dev, &res);
if (IS_ERR(dd->io_base)) {
err = PTR_ERR(dd->io_base);
goto res_err;
}
dd->phys_base = res.start;

View File

@ -1298,7 +1298,7 @@ static ssize_t spacc_stat_irq_thresh_store(struct device *dev,
struct spacc_engine *engine = spacc_dev_to_engine(dev);
unsigned long thresh;
if (strict_strtoul(buf, 0, &thresh))
if (kstrtoul(buf, 0, &thresh))
return -EINVAL;
thresh = clamp(thresh, 1UL, engine->fifo_sz - 1);

View File

@ -647,7 +647,6 @@ static int s5p_aes_probe(struct platform_device *pdev)
clk_disable(pdata->clk);
s5p_dev = NULL;
platform_set_drvdata(pdev, NULL);
return err;
}
@ -668,7 +667,6 @@ static int s5p_aes_remove(struct platform_device *pdev)
clk_disable(pdata->clk);
s5p_dev = NULL;
platform_set_drvdata(pdev, NULL);
return 0;
}

View File

@ -1629,7 +1629,7 @@ static int ux500_cryp_remove(struct platform_device *pdev)
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (res)
release_mem_region(res->start, res->end - res->start + 1);
release_mem_region(res->start, resource_size(res));
kfree(device_data);

View File

@ -3,6 +3,10 @@
#include <linux/types.h>
#define CRC_T10DIF_DIGEST_SIZE 2
#define CRC_T10DIF_BLOCK_SIZE 1
__u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len);
__u16 crc_t10dif(unsigned char const *, size_t);
#endif

View File

@ -66,6 +66,8 @@ config CRC16
config CRC_T10DIF
tristate "CRC calculation for the T10 Data Integrity Field"
select CRYPTO
select CRYPTO_CRCT10DIF
help
This option is only needed if a module that's not in the
kernel tree needs to calculate CRC checks for use with the

View File

@ -11,57 +11,44 @@
#include <linux/types.h>
#include <linux/module.h>
#include <linux/crc-t10dif.h>
#include <linux/err.h>
#include <linux/init.h>
#include <crypto/hash.h>
/* Table generated using the following polynomium:
* x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1
* gt: 0x8bb7
*/
static const __u16 t10_dif_crc_table[256] = {
0x0000, 0x8BB7, 0x9CD9, 0x176E, 0xB205, 0x39B2, 0x2EDC, 0xA56B,
0xEFBD, 0x640A, 0x7364, 0xF8D3, 0x5DB8, 0xD60F, 0xC161, 0x4AD6,
0x54CD, 0xDF7A, 0xC814, 0x43A3, 0xE6C8, 0x6D7F, 0x7A11, 0xF1A6,
0xBB70, 0x30C7, 0x27A9, 0xAC1E, 0x0975, 0x82C2, 0x95AC, 0x1E1B,
0xA99A, 0x222D, 0x3543, 0xBEF4, 0x1B9F, 0x9028, 0x8746, 0x0CF1,
0x4627, 0xCD90, 0xDAFE, 0x5149, 0xF422, 0x7F95, 0x68FB, 0xE34C,
0xFD57, 0x76E0, 0x618E, 0xEA39, 0x4F52, 0xC4E5, 0xD38B, 0x583C,
0x12EA, 0x995D, 0x8E33, 0x0584, 0xA0EF, 0x2B58, 0x3C36, 0xB781,
0xD883, 0x5334, 0x445A, 0xCFED, 0x6A86, 0xE131, 0xF65F, 0x7DE8,
0x373E, 0xBC89, 0xABE7, 0x2050, 0x853B, 0x0E8C, 0x19E2, 0x9255,
0x8C4E, 0x07F9, 0x1097, 0x9B20, 0x3E4B, 0xB5FC, 0xA292, 0x2925,
0x63F3, 0xE844, 0xFF2A, 0x749D, 0xD1F6, 0x5A41, 0x4D2F, 0xC698,
0x7119, 0xFAAE, 0xEDC0, 0x6677, 0xC31C, 0x48AB, 0x5FC5, 0xD472,
0x9EA4, 0x1513, 0x027D, 0x89CA, 0x2CA1, 0xA716, 0xB078, 0x3BCF,
0x25D4, 0xAE63, 0xB90D, 0x32BA, 0x97D1, 0x1C66, 0x0B08, 0x80BF,
0xCA69, 0x41DE, 0x56B0, 0xDD07, 0x786C, 0xF3DB, 0xE4B5, 0x6F02,
0x3AB1, 0xB106, 0xA668, 0x2DDF, 0x88B4, 0x0303, 0x146D, 0x9FDA,
0xD50C, 0x5EBB, 0x49D5, 0xC262, 0x6709, 0xECBE, 0xFBD0, 0x7067,
0x6E7C, 0xE5CB, 0xF2A5, 0x7912, 0xDC79, 0x57CE, 0x40A0, 0xCB17,
0x81C1, 0x0A76, 0x1D18, 0x96AF, 0x33C4, 0xB873, 0xAF1D, 0x24AA,
0x932B, 0x189C, 0x0FF2, 0x8445, 0x212E, 0xAA99, 0xBDF7, 0x3640,
0x7C96, 0xF721, 0xE04F, 0x6BF8, 0xCE93, 0x4524, 0x524A, 0xD9FD,
0xC7E6, 0x4C51, 0x5B3F, 0xD088, 0x75E3, 0xFE54, 0xE93A, 0x628D,
0x285B, 0xA3EC, 0xB482, 0x3F35, 0x9A5E, 0x11E9, 0x0687, 0x8D30,
0xE232, 0x6985, 0x7EEB, 0xF55C, 0x5037, 0xDB80, 0xCCEE, 0x4759,
0x0D8F, 0x8638, 0x9156, 0x1AE1, 0xBF8A, 0x343D, 0x2353, 0xA8E4,
0xB6FF, 0x3D48, 0x2A26, 0xA191, 0x04FA, 0x8F4D, 0x9823, 0x1394,
0x5942, 0xD2F5, 0xC59B, 0x4E2C, 0xEB47, 0x60F0, 0x779E, 0xFC29,
0x4BA8, 0xC01F, 0xD771, 0x5CC6, 0xF9AD, 0x721A, 0x6574, 0xEEC3,
0xA415, 0x2FA2, 0x38CC, 0xB37B, 0x1610, 0x9DA7, 0x8AC9, 0x017E,
0x1F65, 0x94D2, 0x83BC, 0x080B, 0xAD60, 0x26D7, 0x31B9, 0xBA0E,
0xF0D8, 0x7B6F, 0x6C01, 0xE7B6, 0x42DD, 0xC96A, 0xDE04, 0x55B3
};
static struct crypto_shash *crct10dif_tfm;
__u16 crc_t10dif(const unsigned char *buffer, size_t len)
{
__u16 crc = 0;
unsigned int i;
struct {
struct shash_desc shash;
char ctx[2];
} desc;
int err;
for (i = 0 ; i < len ; i++)
crc = (crc << 8) ^ t10_dif_crc_table[((crc >> 8) ^ buffer[i]) & 0xff];
desc.shash.tfm = crct10dif_tfm;
desc.shash.flags = 0;
*(__u16 *)desc.ctx = 0;
return crc;
err = crypto_shash_update(&desc.shash, buffer, len);
BUG_ON(err);
return *(__u16 *)desc.ctx;
}
EXPORT_SYMBOL(crc_t10dif);
static int __init crc_t10dif_mod_init(void)
{
crct10dif_tfm = crypto_alloc_shash("crct10dif", 0, 0);
return PTR_RET(crct10dif_tfm);
}
static void __exit crc_t10dif_mod_fini(void)
{
crypto_free_shash(crct10dif_tfm);
}
module_init(crc_t10dif_mod_init);
module_exit(crc_t10dif_mod_fini);
MODULE_DESCRIPTION("T10 DIF CRC calculation");
MODULE_LICENSE("GPL");