From 2021abaa00da64a4b98948c93bf31a55386cd2d0 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Tue, 14 Jan 2014 18:31:01 +0100 Subject: [PATCH 01/60] crypto: dcp - Move the AES operation type from actx to rctx Move the AES operation type and mode from async crypto context to crypto request context. This allows for recycling of the async crypto context for different kinds of operations. I found this problem when I used dm-crypt, which uses the same async crypto context (actx) for both encryption and decryption requests. Since the requests are enqueued into the processing queue, immediatelly storing the type of operation into async crypto context (actx) caused corruption of this information when encryption and decryption operations followed imediatelly one after the other. When the first operation was dequeued, the second operation was already enqueued and overwritten the type of operation in actx, thus causing incorrect result of the first operation. Fix this problem by storing the type of operation into the crypto request context. Signed-off-by: Marek Vasut Cc: David S. Miller Cc: Fabio Estevam Cc: Shawn Guo Cc: Tom Lendacky Signed-off-by: Herbert Xu --- drivers/crypto/mxs-dcp.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index a6db7fa6f891..56bde65ddadf 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -83,13 +83,16 @@ struct dcp_async_ctx { unsigned int hot:1; /* Crypto-specific context */ - unsigned int enc:1; - unsigned int ecb:1; struct crypto_ablkcipher *fallback; unsigned int key_len; uint8_t key[AES_KEYSIZE_128]; }; +struct dcp_aes_req_ctx { + unsigned int enc:1; + unsigned int ecb:1; +}; + struct dcp_sha_req_ctx { unsigned int init:1; unsigned int fini:1; @@ -190,10 +193,12 @@ static int mxs_dcp_start_dma(struct dcp_async_ctx *actx) /* * Encryption (AES128) */ -static int mxs_dcp_run_aes(struct dcp_async_ctx *actx, int init) +static int mxs_dcp_run_aes(struct dcp_async_ctx *actx, + struct ablkcipher_request *req, int init) { struct dcp *sdcp = global_sdcp; struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan]; + struct dcp_aes_req_ctx *rctx = ablkcipher_request_ctx(req); int ret; dma_addr_t key_phys = dma_map_single(sdcp->dev, sdcp->coh->aes_key, @@ -212,14 +217,14 @@ static int mxs_dcp_run_aes(struct dcp_async_ctx *actx, int init) /* Payload contains the key. */ desc->control0 |= MXS_DCP_CONTROL0_PAYLOAD_KEY; - if (actx->enc) + if (rctx->enc) desc->control0 |= MXS_DCP_CONTROL0_CIPHER_ENCRYPT; if (init) desc->control0 |= MXS_DCP_CONTROL0_CIPHER_INIT; desc->control1 = MXS_DCP_CONTROL1_CIPHER_SELECT_AES128; - if (actx->ecb) + if (rctx->ecb) desc->control1 |= MXS_DCP_CONTROL1_CIPHER_MODE_ECB; else desc->control1 |= MXS_DCP_CONTROL1_CIPHER_MODE_CBC; @@ -247,6 +252,7 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq) struct ablkcipher_request *req = ablkcipher_request_cast(arq); struct dcp_async_ctx *actx = crypto_tfm_ctx(arq->tfm); + struct dcp_aes_req_ctx *rctx = ablkcipher_request_ctx(req); struct scatterlist *dst = req->dst; struct scatterlist *src = req->src; @@ -271,7 +277,7 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq) /* Copy the key from the temporary location. */ memcpy(key, actx->key, actx->key_len); - if (!actx->ecb) { + if (!rctx->ecb) { /* Copy the CBC IV just past the key. */ memcpy(key + AES_KEYSIZE_128, req->info, AES_KEYSIZE_128); /* CBC needs the INIT set. */ @@ -300,7 +306,7 @@ static int mxs_dcp_aes_block_crypt(struct crypto_async_request *arq) * submit the buffer. */ if (actx->fill == out_off || sg_is_last(src)) { - ret = mxs_dcp_run_aes(actx, init); + ret = mxs_dcp_run_aes(actx, req, init); if (ret) return ret; init = 0; @@ -391,13 +397,14 @@ static int mxs_dcp_aes_enqueue(struct ablkcipher_request *req, int enc, int ecb) struct dcp *sdcp = global_sdcp; struct crypto_async_request *arq = &req->base; struct dcp_async_ctx *actx = crypto_tfm_ctx(arq->tfm); + struct dcp_aes_req_ctx *rctx = ablkcipher_request_ctx(req); int ret; if (unlikely(actx->key_len != AES_KEYSIZE_128)) return mxs_dcp_block_fallback(req, enc); - actx->enc = enc; - actx->ecb = ecb; + rctx->enc = enc; + rctx->ecb = ecb; actx->chan = DCP_CHAN_CRYPTO; mutex_lock(&sdcp->mutex[actx->chan]); @@ -484,7 +491,7 @@ static int mxs_dcp_aes_fallback_init(struct crypto_tfm *tfm) return PTR_ERR(blk); actx->fallback = blk; - tfm->crt_ablkcipher.reqsize = sizeof(struct dcp_async_ctx); + tfm->crt_ablkcipher.reqsize = sizeof(struct dcp_aes_req_ctx); return 0; } From 0a63b09dd6e321d4488347d6283685d08ee4bb7f Mon Sep 17 00:00:00 2001 From: Nitesh Lal Date: Sun, 9 Feb 2014 09:59:13 +0800 Subject: [PATCH 02/60] crypto: caam - Fix job ring discovery in controller driver The SEC Controller driver creates platform devices for it's child job ring nodes. Currently the driver uses for_each_compatible routine which traverses the whole device tree to create the job rings for the platform device. The patch changes this to search for the compatible property of job ring only in the child nodes i.e., the job rings are created as per the number of children associated with the crypto node. Signed-off-by: Nitesh Lal Reviewed-by: Horia Geanta Signed-off-by: Herbert Xu --- drivers/crypto/caam/ctrl.c | 25 ++++++------------------- 1 file changed, 6 insertions(+), 19 deletions(-) diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index 63fb1af2c431..30f434fd5dec 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -443,13 +443,10 @@ static int caam_probe(struct platform_device *pdev) * for all, then go probe each one. */ rspec = 0; - for_each_compatible_node(np, NULL, "fsl,sec-v4.0-job-ring") - rspec++; - if (!rspec) { - /* for backward compatible with device trees */ - for_each_compatible_node(np, NULL, "fsl,sec4.0-job-ring") + for_each_available_child_of_node(nprop, np) + if (of_device_is_compatible(np, "fsl,sec-v4.0-job-ring") || + of_device_is_compatible(np, "fsl,sec4.0-job-ring")) rspec++; - } ctrlpriv->jrpdev = kzalloc(sizeof(struct platform_device *) * rspec, GFP_KERNEL); @@ -460,18 +457,9 @@ static int caam_probe(struct platform_device *pdev) ring = 0; ctrlpriv->total_jobrs = 0; - for_each_compatible_node(np, NULL, "fsl,sec-v4.0-job-ring") { - ctrlpriv->jrpdev[ring] = - of_platform_device_create(np, NULL, dev); - if (!ctrlpriv->jrpdev[ring]) { - pr_warn("JR%d Platform device creation error\n", ring); - continue; - } - ctrlpriv->total_jobrs++; - ring++; - } - if (!ring) { - for_each_compatible_node(np, NULL, "fsl,sec4.0-job-ring") { + for_each_available_child_of_node(nprop, np) + if (of_device_is_compatible(np, "fsl,sec-v4.0-job-ring") || + of_device_is_compatible(np, "fsl,sec4.0-job-ring")) { ctrlpriv->jrpdev[ring] = of_platform_device_create(np, NULL, dev); if (!ctrlpriv->jrpdev[ring]) { @@ -482,7 +470,6 @@ static int caam_probe(struct platform_device *pdev) ctrlpriv->total_jobrs++; ring++; } - } /* Check to see if QI present. If so, enable */ ctrlpriv->qi_present = !!(rd_reg64(&topregs->ctrl.perfmon.comp_parms) & From 80e84c16e72a0eac30085322b4664b7b6b0dde75 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Sun, 9 Feb 2014 09:59:14 +0800 Subject: [PATCH 03/60] crypto: ccp - Fix ccp_run_passthru_cmd dma variable assignments There are some suspicious looking lines of code in the new ccp driver, including one that assigns a variable to itself, and another that overwrites a previous assignment. This may have been a cut-and-paste error where 'src' was forgotten to be changed to 'dst'. I have no hardware to test this, so this is untested. Signed-off-by: Dave Jones Acked-by: Tom Lendacky Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-ops.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c index 71ed3ade7e12..c266a7b154bb 100644 --- a/drivers/crypto/ccp/ccp-ops.c +++ b/drivers/crypto/ccp/ccp-ops.c @@ -1666,8 +1666,8 @@ static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q, op.dst.type = CCP_MEMTYPE_SYSTEM; op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg); - op.src.u.dma.offset = dst.sg_wa.sg_used; - op.src.u.dma.length = op.src.u.dma.length; + op.dst.u.dma.offset = dst.sg_wa.sg_used; + op.dst.u.dma.length = op.src.u.dma.length; ret = ccp_perform_passthru(&op); if (ret) { From d81ed6534fd988a8a24fb607b459444d4b3d391a Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Fri, 24 Jan 2014 16:17:56 -0600 Subject: [PATCH 04/60] crypto: ccp - Allow for selective disablement of crypto API algorithms Introduce module parameters that allow for disabling of a crypto algorithm by not registering the algorithm with the crypto API. Signed-off-by: Tom Lendacky Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-crypto-main.c | 37 +++++++++++++++++++--------- 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c index 2636f044789d..b3f22b07b5bd 100644 --- a/drivers/crypto/ccp/ccp-crypto-main.c +++ b/drivers/crypto/ccp/ccp-crypto-main.c @@ -11,6 +11,7 @@ */ #include +#include #include #include #include @@ -24,6 +25,14 @@ MODULE_LICENSE("GPL"); MODULE_VERSION("1.0.0"); MODULE_DESCRIPTION("AMD Cryptographic Coprocessor crypto API support"); +static unsigned int aes_disable; +module_param(aes_disable, uint, 0444); +MODULE_PARM_DESC(aes_disable, "Disable use of AES - any non-zero value"); + +static unsigned int sha_disable; +module_param(sha_disable, uint, 0444); +MODULE_PARM_DESC(sha_disable, "Disable use of SHA - any non-zero value"); + /* List heads for the supported algorithms */ static LIST_HEAD(hash_algs); @@ -337,21 +346,25 @@ static int ccp_register_algs(void) { int ret; - ret = ccp_register_aes_algs(&cipher_algs); - if (ret) - return ret; + if (!aes_disable) { + ret = ccp_register_aes_algs(&cipher_algs); + if (ret) + return ret; - ret = ccp_register_aes_cmac_algs(&hash_algs); - if (ret) - return ret; + ret = ccp_register_aes_cmac_algs(&hash_algs); + if (ret) + return ret; - ret = ccp_register_aes_xts_algs(&cipher_algs); - if (ret) - return ret; + ret = ccp_register_aes_xts_algs(&cipher_algs); + if (ret) + return ret; + } - ret = ccp_register_sha_algs(&hash_algs); - if (ret) - return ret; + if (!sha_disable) { + ret = ccp_register_sha_algs(&hash_algs); + if (ret) + return ret; + } return 0; } From c11baa02c5d6ea06362fa61da070af34b7706c83 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Fri, 24 Jan 2014 16:18:02 -0600 Subject: [PATCH 05/60] crypto: ccp - Move HMAC calculation down to ccp ops file Move the support to perform an HMAC calculation into the CCP operations file. This eliminates the need to perform a synchronous SHA operation used to calculate the HMAC. Signed-off-by: Tom Lendacky Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-crypto-sha.c | 130 ++++++---------------------- drivers/crypto/ccp/ccp-crypto.h | 8 +- drivers/crypto/ccp/ccp-ops.c | 104 +++++++++++++++++++++- include/linux/ccp.h | 7 ++ 4 files changed, 139 insertions(+), 110 deletions(-) diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c index 3867290b3531..873f23425245 100644 --- a/drivers/crypto/ccp/ccp-crypto-sha.c +++ b/drivers/crypto/ccp/ccp-crypto-sha.c @@ -24,75 +24,10 @@ #include "ccp-crypto.h" -struct ccp_sha_result { - struct completion completion; - int err; -}; - -static void ccp_sync_hash_complete(struct crypto_async_request *req, int err) -{ - struct ccp_sha_result *result = req->data; - - if (err == -EINPROGRESS) - return; - - result->err = err; - complete(&result->completion); -} - -static int ccp_sync_hash(struct crypto_ahash *tfm, u8 *buf, - struct scatterlist *sg, unsigned int len) -{ - struct ccp_sha_result result; - struct ahash_request *req; - int ret; - - init_completion(&result.completion); - - req = ahash_request_alloc(tfm, GFP_KERNEL); - if (!req) - return -ENOMEM; - - ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, - ccp_sync_hash_complete, &result); - ahash_request_set_crypt(req, sg, buf, len); - - ret = crypto_ahash_digest(req); - if ((ret == -EINPROGRESS) || (ret == -EBUSY)) { - ret = wait_for_completion_interruptible(&result.completion); - if (!ret) - ret = result.err; - } - - ahash_request_free(req); - - return ret; -} - -static int ccp_sha_finish_hmac(struct crypto_async_request *async_req) -{ - struct ahash_request *req = ahash_request_cast(async_req); - struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ccp_ctx *ctx = crypto_ahash_ctx(tfm); - struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); - struct scatterlist sg[2]; - unsigned int block_size = - crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); - unsigned int digest_size = crypto_ahash_digestsize(tfm); - - sg_init_table(sg, ARRAY_SIZE(sg)); - sg_set_buf(&sg[0], ctx->u.sha.opad, block_size); - sg_set_buf(&sg[1], rctx->ctx, digest_size); - - return ccp_sync_hash(ctx->u.sha.hmac_tfm, req->result, sg, - block_size + digest_size); -} - static int ccp_sha_complete(struct crypto_async_request *async_req, int ret) { struct ahash_request *req = ahash_request_cast(async_req); struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - struct ccp_ctx *ctx = crypto_ahash_ctx(tfm); struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); unsigned int digest_size = crypto_ahash_digestsize(tfm); @@ -112,10 +47,6 @@ static int ccp_sha_complete(struct crypto_async_request *async_req, int ret) if (req->result) memcpy(req->result, rctx->ctx, digest_size); - /* If we're doing an HMAC, we need to perform that on the final op */ - if (rctx->final && ctx->u.sha.key_len) - ret = ccp_sha_finish_hmac(async_req); - e_free: sg_free_table(&rctx->data_sg); @@ -126,6 +57,7 @@ static int ccp_do_sha_update(struct ahash_request *req, unsigned int nbytes, unsigned int final) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); + struct ccp_ctx *ctx = crypto_ahash_ctx(tfm); struct ccp_sha_req_ctx *rctx = ahash_request_ctx(req); struct scatterlist *sg; unsigned int block_size = @@ -196,6 +128,11 @@ static int ccp_do_sha_update(struct ahash_request *req, unsigned int nbytes, rctx->cmd.u.sha.ctx_len = sizeof(rctx->ctx); rctx->cmd.u.sha.src = sg; rctx->cmd.u.sha.src_len = rctx->hash_cnt; + rctx->cmd.u.sha.opad = ctx->u.sha.key_len ? + &ctx->u.sha.opad_sg : NULL; + rctx->cmd.u.sha.opad_len = ctx->u.sha.key_len ? + ctx->u.sha.opad_count : 0; + rctx->cmd.u.sha.first = rctx->first; rctx->cmd.u.sha.final = rctx->final; rctx->cmd.u.sha.msg_bits = rctx->msg_bits; @@ -218,7 +155,6 @@ static int ccp_sha_init(struct ahash_request *req) memset(rctx, 0, sizeof(*rctx)); - memcpy(rctx->ctx, alg->init, sizeof(rctx->ctx)); rctx->type = alg->type; rctx->first = 1; @@ -261,10 +197,13 @@ static int ccp_sha_setkey(struct crypto_ahash *tfm, const u8 *key, unsigned int key_len) { struct ccp_ctx *ctx = crypto_tfm_ctx(crypto_ahash_tfm(tfm)); - struct scatterlist sg; - unsigned int block_size = - crypto_tfm_alg_blocksize(crypto_ahash_tfm(tfm)); - unsigned int digest_size = crypto_ahash_digestsize(tfm); + struct crypto_shash *shash = ctx->u.sha.hmac_tfm; + struct { + struct shash_desc sdesc; + char ctx[crypto_shash_descsize(shash)]; + } desc; + unsigned int block_size = crypto_shash_blocksize(shash); + unsigned int digest_size = crypto_shash_digestsize(shash); int i, ret; /* Set to zero until complete */ @@ -277,8 +216,12 @@ static int ccp_sha_setkey(struct crypto_ahash *tfm, const u8 *key, if (key_len > block_size) { /* Must hash the input key */ - sg_init_one(&sg, key, key_len); - ret = ccp_sync_hash(tfm, ctx->u.sha.key, &sg, key_len); + desc.sdesc.tfm = shash; + desc.sdesc.flags = crypto_ahash_get_flags(tfm) & + CRYPTO_TFM_REQ_MAY_SLEEP; + + ret = crypto_shash_digest(&desc.sdesc, key, key_len, + ctx->u.sha.key); if (ret) { crypto_ahash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; @@ -293,6 +236,9 @@ static int ccp_sha_setkey(struct crypto_ahash *tfm, const u8 *key, ctx->u.sha.opad[i] = ctx->u.sha.key[i] ^ 0x5c; } + sg_init_one(&ctx->u.sha.opad_sg, ctx->u.sha.opad, block_size); + ctx->u.sha.opad_count = block_size; + ctx->u.sha.key_len = key_len; return 0; @@ -319,10 +265,9 @@ static int ccp_hmac_sha_cra_init(struct crypto_tfm *tfm) { struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); struct ccp_crypto_ahash_alg *alg = ccp_crypto_ahash_alg(tfm); - struct crypto_ahash *hmac_tfm; + struct crypto_shash *hmac_tfm; - hmac_tfm = crypto_alloc_ahash(alg->child_alg, - CRYPTO_ALG_TYPE_AHASH, 0); + hmac_tfm = crypto_alloc_shash(alg->child_alg, 0, 0); if (IS_ERR(hmac_tfm)) { pr_warn("could not load driver %s need for HMAC support\n", alg->child_alg); @@ -339,35 +284,14 @@ static void ccp_hmac_sha_cra_exit(struct crypto_tfm *tfm) struct ccp_ctx *ctx = crypto_tfm_ctx(tfm); if (ctx->u.sha.hmac_tfm) - crypto_free_ahash(ctx->u.sha.hmac_tfm); + crypto_free_shash(ctx->u.sha.hmac_tfm); ccp_sha_cra_exit(tfm); } -static const __be32 sha1_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = { - cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1), - cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3), - cpu_to_be32(SHA1_H4), 0, 0, 0, -}; - -static const __be32 sha224_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = { - cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1), - cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3), - cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5), - cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7), -}; - -static const __be32 sha256_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = { - cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1), - cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3), - cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5), - cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7), -}; - struct ccp_sha_def { const char *name; const char *drv_name; - const __be32 *init; enum ccp_sha_type type; u32 digest_size; u32 block_size; @@ -377,7 +301,6 @@ static struct ccp_sha_def sha_algs[] = { { .name = "sha1", .drv_name = "sha1-ccp", - .init = sha1_init, .type = CCP_SHA_TYPE_1, .digest_size = SHA1_DIGEST_SIZE, .block_size = SHA1_BLOCK_SIZE, @@ -385,7 +308,6 @@ static struct ccp_sha_def sha_algs[] = { { .name = "sha224", .drv_name = "sha224-ccp", - .init = sha224_init, .type = CCP_SHA_TYPE_224, .digest_size = SHA224_DIGEST_SIZE, .block_size = SHA224_BLOCK_SIZE, @@ -393,7 +315,6 @@ static struct ccp_sha_def sha_algs[] = { { .name = "sha256", .drv_name = "sha256-ccp", - .init = sha256_init, .type = CCP_SHA_TYPE_256, .digest_size = SHA256_DIGEST_SIZE, .block_size = SHA256_BLOCK_SIZE, @@ -460,7 +381,6 @@ static int ccp_register_sha_alg(struct list_head *head, INIT_LIST_HEAD(&ccp_alg->entry); - ccp_alg->init = def->init; ccp_alg->type = def->type; alg = &ccp_alg->alg; diff --git a/drivers/crypto/ccp/ccp-crypto.h b/drivers/crypto/ccp/ccp-crypto.h index b222231b6169..9aa4ae184f7f 100644 --- a/drivers/crypto/ccp/ccp-crypto.h +++ b/drivers/crypto/ccp/ccp-crypto.h @@ -137,11 +137,14 @@ struct ccp_aes_cmac_req_ctx { #define MAX_SHA_BLOCK_SIZE SHA256_BLOCK_SIZE struct ccp_sha_ctx { + struct scatterlist opad_sg; + unsigned int opad_count; + unsigned int key_len; u8 key[MAX_SHA_BLOCK_SIZE]; u8 ipad[MAX_SHA_BLOCK_SIZE]; u8 opad[MAX_SHA_BLOCK_SIZE]; - struct crypto_ahash *hmac_tfm; + struct crypto_shash *hmac_tfm; }; struct ccp_sha_req_ctx { @@ -167,9 +170,6 @@ struct ccp_sha_req_ctx { unsigned int buf_count; u8 buf[MAX_SHA_BLOCK_SIZE]; - /* HMAC support field */ - struct scatterlist pad_sg; - /* CCP driver command */ struct ccp_cmd cmd; }; diff --git a/drivers/crypto/ccp/ccp-ops.c b/drivers/crypto/ccp/ccp-ops.c index c266a7b154bb..9ae006d69df4 100644 --- a/drivers/crypto/ccp/ccp-ops.c +++ b/drivers/crypto/ccp/ccp-ops.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "ccp-dev.h" @@ -132,6 +133,27 @@ struct ccp_op { } u; }; +/* SHA initial context values */ +static const __be32 ccp_sha1_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = { + cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1), + cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3), + cpu_to_be32(SHA1_H4), 0, 0, 0, +}; + +static const __be32 ccp_sha224_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = { + cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1), + cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3), + cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5), + cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7), +}; + +static const __be32 ccp_sha256_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = { + cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1), + cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3), + cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5), + cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7), +}; + /* The CCP cannot perform zero-length sha operations so the caller * is required to buffer data for the final operation. However, a * sha operation for a message with a total length of zero is valid @@ -1411,7 +1433,27 @@ static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) if (ret) return ret; - ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len); + if (sha->first) { + const __be32 *init; + + switch (sha->type) { + case CCP_SHA_TYPE_1: + init = ccp_sha1_init; + break; + case CCP_SHA_TYPE_224: + init = ccp_sha224_init; + break; + case CCP_SHA_TYPE_256: + init = ccp_sha256_init; + break; + default: + ret = -EINVAL; + goto e_ctx; + } + memcpy(ctx.address, init, CCP_SHA_CTXSIZE); + } else + ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len); + ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx, CCP_PASSTHRU_BYTESWAP_256BIT); if (ret) { @@ -1451,6 +1493,66 @@ static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd) ccp_get_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len); + if (sha->final && sha->opad) { + /* HMAC operation, recursively perform final SHA */ + struct ccp_cmd hmac_cmd; + struct scatterlist sg; + u64 block_size, digest_size; + u8 *hmac_buf; + + switch (sha->type) { + case CCP_SHA_TYPE_1: + block_size = SHA1_BLOCK_SIZE; + digest_size = SHA1_DIGEST_SIZE; + break; + case CCP_SHA_TYPE_224: + block_size = SHA224_BLOCK_SIZE; + digest_size = SHA224_DIGEST_SIZE; + break; + case CCP_SHA_TYPE_256: + block_size = SHA256_BLOCK_SIZE; + digest_size = SHA256_DIGEST_SIZE; + break; + default: + ret = -EINVAL; + goto e_data; + } + + if (sha->opad_len != block_size) { + ret = -EINVAL; + goto e_data; + } + + hmac_buf = kmalloc(block_size + digest_size, GFP_KERNEL); + if (!hmac_buf) { + ret = -ENOMEM; + goto e_data; + } + sg_init_one(&sg, hmac_buf, block_size + digest_size); + + scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0); + memcpy(hmac_buf + block_size, ctx.address, digest_size); + + memset(&hmac_cmd, 0, sizeof(hmac_cmd)); + hmac_cmd.engine = CCP_ENGINE_SHA; + hmac_cmd.u.sha.type = sha->type; + hmac_cmd.u.sha.ctx = sha->ctx; + hmac_cmd.u.sha.ctx_len = sha->ctx_len; + hmac_cmd.u.sha.src = &sg; + hmac_cmd.u.sha.src_len = block_size + digest_size; + hmac_cmd.u.sha.opad = NULL; + hmac_cmd.u.sha.opad_len = 0; + hmac_cmd.u.sha.first = 1; + hmac_cmd.u.sha.final = 1; + hmac_cmd.u.sha.msg_bits = (block_size + digest_size) << 3; + + ret = ccp_run_sha_cmd(cmd_q, &hmac_cmd); + if (ret) + cmd->engine_error = hmac_cmd.engine_error; + + kfree(hmac_buf); + } + e_data: ccp_free_data(&src, cmd_q); diff --git a/include/linux/ccp.h b/include/linux/ccp.h index b941ab9f762b..ebcc9d146219 100644 --- a/include/linux/ccp.h +++ b/include/linux/ccp.h @@ -232,6 +232,9 @@ enum ccp_sha_type { * @ctx_len: length in bytes of hash value * @src: data to be used for this operation * @src_len: length in bytes of data used for this operation + * @opad: data to be used for final HMAC operation + * @opad_len: length in bytes of data used for final HMAC operation + * @first: indicates first SHA operation * @final: indicates final SHA operation * @msg_bits: total length of the message in bits used in final SHA operation * @@ -251,6 +254,10 @@ struct ccp_sha_engine { struct scatterlist *src; u64 src_len; /* In bytes */ + struct scatterlist *opad; + u32 opad_len; /* In bytes */ + + u32 first; /* Indicates first sha cmd */ u32 final; /* Indicates final sha cmd */ u64 msg_bits; /* Message length in bits required for * final sha cmd */ From bc3854476f36d816d52cd8d41d1ecab2f8b6cdcf Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Fri, 24 Jan 2014 16:18:08 -0600 Subject: [PATCH 06/60] crypto: ccp - Use a single queue for proper ordering of tfm requests Move to a single queue to serialize requests within a tfm. When testing using IPSec with a large number of network connections the per cpu tfm queuing logic was not working properly. Signed-off-by: Tom Lendacky Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-crypto-main.c | 164 ++++++++------------------- 1 file changed, 48 insertions(+), 116 deletions(-) diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c index b3f22b07b5bd..010fded5d46b 100644 --- a/drivers/crypto/ccp/ccp-crypto-main.c +++ b/drivers/crypto/ccp/ccp-crypto-main.c @@ -38,23 +38,20 @@ MODULE_PARM_DESC(sha_disable, "Disable use of SHA - any non-zero value"); static LIST_HEAD(hash_algs); static LIST_HEAD(cipher_algs); -/* For any tfm, requests for that tfm on the same CPU must be returned - * in the order received. With multiple queues available, the CCP can - * process more than one cmd at a time. Therefore we must maintain - * a cmd list to insure the proper ordering of requests on a given tfm/cpu - * combination. +/* For any tfm, requests for that tfm must be returned on the order + * received. With multiple queues available, the CCP can process more + * than one cmd at a time. Therefore we must maintain a cmd list to insure + * the proper ordering of requests on a given tfm. */ -struct ccp_crypto_cpu_queue { +struct ccp_crypto_queue { struct list_head cmds; struct list_head *backlog; unsigned int cmd_count; }; -#define CCP_CRYPTO_MAX_QLEN 50 +#define CCP_CRYPTO_MAX_QLEN 100 -struct ccp_crypto_percpu_queue { - struct ccp_crypto_cpu_queue __percpu *cpu_queue; -}; -static struct ccp_crypto_percpu_queue req_queue; +static struct ccp_crypto_queue req_queue; +static spinlock_t req_queue_lock; struct ccp_crypto_cmd { struct list_head entry; @@ -71,8 +68,6 @@ struct ccp_crypto_cmd { /* Used for held command processing to determine state */ int ret; - - int cpu; }; struct ccp_crypto_cpu { @@ -91,25 +86,21 @@ static inline bool ccp_crypto_success(int err) return true; } -/* - * ccp_crypto_cmd_complete must be called while running on the appropriate - * cpu and the caller must have done a get_cpu to disable preemption - */ static struct ccp_crypto_cmd *ccp_crypto_cmd_complete( struct ccp_crypto_cmd *crypto_cmd, struct ccp_crypto_cmd **backlog) { - struct ccp_crypto_cpu_queue *cpu_queue; struct ccp_crypto_cmd *held = NULL, *tmp; + unsigned long flags; *backlog = NULL; - cpu_queue = this_cpu_ptr(req_queue.cpu_queue); + spin_lock_irqsave(&req_queue_lock, flags); /* Held cmds will be after the current cmd in the queue so start * searching for a cmd with a matching tfm for submission. */ tmp = crypto_cmd; - list_for_each_entry_continue(tmp, &cpu_queue->cmds, entry) { + list_for_each_entry_continue(tmp, &req_queue.cmds, entry) { if (crypto_cmd->tfm != tmp->tfm) continue; held = tmp; @@ -120,47 +111,45 @@ static struct ccp_crypto_cmd *ccp_crypto_cmd_complete( * Because cmds can be executed from any point in the cmd list * special precautions have to be taken when handling the backlog. */ - if (cpu_queue->backlog != &cpu_queue->cmds) { + if (req_queue.backlog != &req_queue.cmds) { /* Skip over this cmd if it is the next backlog cmd */ - if (cpu_queue->backlog == &crypto_cmd->entry) - cpu_queue->backlog = crypto_cmd->entry.next; + if (req_queue.backlog == &crypto_cmd->entry) + req_queue.backlog = crypto_cmd->entry.next; - *backlog = container_of(cpu_queue->backlog, + *backlog = container_of(req_queue.backlog, struct ccp_crypto_cmd, entry); - cpu_queue->backlog = cpu_queue->backlog->next; + req_queue.backlog = req_queue.backlog->next; /* Skip over this cmd if it is now the next backlog cmd */ - if (cpu_queue->backlog == &crypto_cmd->entry) - cpu_queue->backlog = crypto_cmd->entry.next; + if (req_queue.backlog == &crypto_cmd->entry) + req_queue.backlog = crypto_cmd->entry.next; } /* Remove the cmd entry from the list of cmds */ - cpu_queue->cmd_count--; + req_queue.cmd_count--; list_del(&crypto_cmd->entry); + spin_unlock_irqrestore(&req_queue_lock, flags); + return held; } -static void ccp_crypto_complete_on_cpu(struct work_struct *work) +static void ccp_crypto_complete(void *data, int err) { - struct ccp_crypto_cpu *cpu_work = - container_of(work, struct ccp_crypto_cpu, work); - struct ccp_crypto_cmd *crypto_cmd = cpu_work->crypto_cmd; + struct ccp_crypto_cmd *crypto_cmd = data; struct ccp_crypto_cmd *held, *next, *backlog; struct crypto_async_request *req = crypto_cmd->req; struct ccp_ctx *ctx = crypto_tfm_ctx(req->tfm); - int cpu, ret; + int ret; - cpu = get_cpu(); - - if (cpu_work->err == -EINPROGRESS) { + if (err == -EINPROGRESS) { /* Only propogate the -EINPROGRESS if necessary */ if (crypto_cmd->ret == -EBUSY) { crypto_cmd->ret = -EINPROGRESS; req->complete(req, -EINPROGRESS); } - goto e_cpu; + return; } /* Operation has completed - update the queue before invoking @@ -178,7 +167,7 @@ static void ccp_crypto_complete_on_cpu(struct work_struct *work) req->complete(req, -EINPROGRESS); /* Completion callbacks */ - ret = cpu_work->err; + ret = err; if (ctx->complete) ret = ctx->complete(req, ret); req->complete(req, ret); @@ -203,52 +192,28 @@ static void ccp_crypto_complete_on_cpu(struct work_struct *work) } kfree(crypto_cmd); - -e_cpu: - put_cpu(); - - complete(&cpu_work->completion); -} - -static void ccp_crypto_complete(void *data, int err) -{ - struct ccp_crypto_cmd *crypto_cmd = data; - struct ccp_crypto_cpu cpu_work; - - INIT_WORK(&cpu_work.work, ccp_crypto_complete_on_cpu); - init_completion(&cpu_work.completion); - cpu_work.crypto_cmd = crypto_cmd; - cpu_work.err = err; - - schedule_work_on(crypto_cmd->cpu, &cpu_work.work); - - /* Keep the completion call synchronous */ - wait_for_completion(&cpu_work.completion); } static int ccp_crypto_enqueue_cmd(struct ccp_crypto_cmd *crypto_cmd) { - struct ccp_crypto_cpu_queue *cpu_queue; struct ccp_crypto_cmd *active = NULL, *tmp; - int cpu, ret; + unsigned long flags; + int ret; - cpu = get_cpu(); - crypto_cmd->cpu = cpu; - - cpu_queue = this_cpu_ptr(req_queue.cpu_queue); + spin_lock_irqsave(&req_queue_lock, flags); /* Check if the cmd can/should be queued */ - if (cpu_queue->cmd_count >= CCP_CRYPTO_MAX_QLEN) { + if (req_queue.cmd_count >= CCP_CRYPTO_MAX_QLEN) { ret = -EBUSY; if (!(crypto_cmd->cmd->flags & CCP_CMD_MAY_BACKLOG)) - goto e_cpu; + goto e_lock; } /* Look for an entry with the same tfm. If there is a cmd - * with the same tfm in the list for this cpu then the current - * cmd cannot be submitted to the CCP yet. + * with the same tfm in the list then the current cmd cannot + * be submitted to the CCP yet. */ - list_for_each_entry(tmp, &cpu_queue->cmds, entry) { + list_for_each_entry(tmp, &req_queue.cmds, entry) { if (crypto_cmd->tfm != tmp->tfm) continue; active = tmp; @@ -259,21 +224,21 @@ static int ccp_crypto_enqueue_cmd(struct ccp_crypto_cmd *crypto_cmd) if (!active) { ret = ccp_enqueue_cmd(crypto_cmd->cmd); if (!ccp_crypto_success(ret)) - goto e_cpu; + goto e_lock; } - if (cpu_queue->cmd_count >= CCP_CRYPTO_MAX_QLEN) { + if (req_queue.cmd_count >= CCP_CRYPTO_MAX_QLEN) { ret = -EBUSY; - if (cpu_queue->backlog == &cpu_queue->cmds) - cpu_queue->backlog = &crypto_cmd->entry; + if (req_queue.backlog == &req_queue.cmds) + req_queue.backlog = &crypto_cmd->entry; } crypto_cmd->ret = ret; - cpu_queue->cmd_count++; - list_add_tail(&crypto_cmd->entry, &cpu_queue->cmds); + req_queue.cmd_count++; + list_add_tail(&crypto_cmd->entry, &req_queue.cmds); -e_cpu: - put_cpu(); +e_lock: + spin_unlock_irqrestore(&req_queue_lock, flags); return ret; } @@ -387,50 +352,18 @@ static void ccp_unregister_algs(void) } } -static int ccp_init_queues(void) -{ - struct ccp_crypto_cpu_queue *cpu_queue; - int cpu; - - req_queue.cpu_queue = alloc_percpu(struct ccp_crypto_cpu_queue); - if (!req_queue.cpu_queue) - return -ENOMEM; - - for_each_possible_cpu(cpu) { - cpu_queue = per_cpu_ptr(req_queue.cpu_queue, cpu); - INIT_LIST_HEAD(&cpu_queue->cmds); - cpu_queue->backlog = &cpu_queue->cmds; - cpu_queue->cmd_count = 0; - } - - return 0; -} - -static void ccp_fini_queue(void) -{ - struct ccp_crypto_cpu_queue *cpu_queue; - int cpu; - - for_each_possible_cpu(cpu) { - cpu_queue = per_cpu_ptr(req_queue.cpu_queue, cpu); - BUG_ON(!list_empty(&cpu_queue->cmds)); - } - free_percpu(req_queue.cpu_queue); -} - static int ccp_crypto_init(void) { int ret; - ret = ccp_init_queues(); - if (ret) - return ret; + spin_lock_init(&req_queue_lock); + INIT_LIST_HEAD(&req_queue.cmds); + req_queue.backlog = &req_queue.cmds; + req_queue.cmd_count = 0; ret = ccp_register_algs(); - if (ret) { + if (ret) ccp_unregister_algs(); - ccp_fini_queue(); - } return ret; } @@ -438,7 +371,6 @@ static int ccp_crypto_init(void) static void ccp_crypto_exit(void) { ccp_unregister_algs(); - ccp_fini_queue(); } module_init(ccp_crypto_init); From 530abd89387b5213000b214be64fadd8ab3176a7 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Fri, 24 Jan 2014 16:18:14 -0600 Subject: [PATCH 07/60] crypto: ccp - Perform completion callbacks using a tasklet Change from scheduling work to scheduling a tasklet to perform the callback operations. Signed-off-by: Tom Lendacky Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-dev.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c index c3bc21264600..2c7816149b01 100644 --- a/drivers/crypto/ccp/ccp-dev.c +++ b/drivers/crypto/ccp/ccp-dev.c @@ -30,6 +30,11 @@ MODULE_LICENSE("GPL"); MODULE_VERSION("1.0.0"); MODULE_DESCRIPTION("AMD Cryptographic Coprocessor driver"); +struct ccp_tasklet_data { + struct completion completion; + struct ccp_cmd *cmd; +}; + static struct ccp_device *ccp_dev; static inline struct ccp_device *ccp_get_device(void) @@ -192,17 +197,23 @@ static struct ccp_cmd *ccp_dequeue_cmd(struct ccp_cmd_queue *cmd_q) return cmd; } -static void ccp_do_cmd_complete(struct work_struct *work) +static void ccp_do_cmd_complete(unsigned long data) { - struct ccp_cmd *cmd = container_of(work, struct ccp_cmd, work); + struct ccp_tasklet_data *tdata = (struct ccp_tasklet_data *)data; + struct ccp_cmd *cmd = tdata->cmd; cmd->callback(cmd->data, cmd->ret); + complete(&tdata->completion); } static int ccp_cmd_queue_thread(void *data) { struct ccp_cmd_queue *cmd_q = (struct ccp_cmd_queue *)data; struct ccp_cmd *cmd; + struct ccp_tasklet_data tdata; + struct tasklet_struct tasklet; + + tasklet_init(&tasklet, ccp_do_cmd_complete, (unsigned long)&tdata); set_current_state(TASK_INTERRUPTIBLE); while (!kthread_should_stop()) { @@ -220,8 +231,10 @@ static int ccp_cmd_queue_thread(void *data) cmd->ret = ccp_run_cmd(cmd_q, cmd); /* Schedule the completion callback */ - INIT_WORK(&cmd->work, ccp_do_cmd_complete); - schedule_work(&cmd->work); + tdata.cmd = cmd; + init_completion(&tdata.completion); + tasklet_schedule(&tasklet); + wait_for_completion(&tdata.completion); } __set_current_state(TASK_RUNNING); From f3de9cb1ca6ce347393c4789b3a6142f96827f18 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Tue, 28 Jan 2014 20:17:23 +0800 Subject: [PATCH 08/60] crypto: talitos: init the priv->alg_list more earlier in talitos_probe() In function talitos_probe(), it will jump to err_out when getting an error in talitos_probe_irq(). Then the uninitialized list head priv->alg_list will be used in function talitos_remove(). In this case we would get a call trace like the following. So move up the initialization of priv->alg_list. Unable to handle kernel paging request for data at address 0x00000000 Faulting instruction address: 0xc0459ff4 Oops: Kernel access of bad area, sig: 11 [#1] SMP NR_CPUS=8 P1020 RDB Modules linked in: CPU: 1 PID: 1 Comm: swapper/0 Tainted: G W 3.13.0-08789-g54c0a4b46150 #33 task: cf050000 ti: cf04c000 task.ti: cf04c000 NIP: c0459ff4 LR: c0459fd4 CTR: c02f2438 REGS: cf04dcb0 TRAP: 0300 Tainted: G W (3.13.0-08789-g54c0a4b46150) MSR: 00029000 CR: 82000028 XER: 20000000 DEAR: 00000000 ESR: 00000000 GPR00: c045ac28 cf04dd60 cf050000 cf2579c0 00021000 00000000 c02f35b0 0000014e GPR08: c07e702c cf104300 c07e702c 0000014e 22000024 00000000 c0002a3c 00000000 GPR16: 00000000 00000000 00000000 00000000 00000000 00000000 c082e4e0 000000df GPR24: 00000000 00100100 00200200 cf257a2c cf0efe10 cf2579c0 cf0efe10 00000000 NIP [c0459ff4] talitos_remove+0x3c/0x1c8 LR [c0459fd4] talitos_remove+0x1c/0x1c8 Call Trace: [cf04dd60] [c07485d8] __func__.13331+0x1241c8/0x1391c0 (unreliable) [cf04dd90] [c045ac28] talitos_probe+0x244/0x998 [cf04dde0] [c0306a74] platform_drv_probe+0x28/0x68 [cf04ddf0] [c0304d38] really_probe+0x78/0x250 [cf04de10] [c030505c] __driver_attach+0xc8/0xcc [cf04de30] [c0302e98] bus_for_each_dev+0x6c/0xb8 [cf04de60] [c03043cc] bus_add_driver+0x168/0x220 [cf04de80] [c0305798] driver_register+0x88/0x130 [cf04de90] [c0002458] do_one_initcall+0x14c/0x198 [cf04df00] [c079f904] kernel_init_freeable+0x138/0x1d4 [cf04df30] [c0002a50] kernel_init+0x14/0x124 [cf04df40] [c000ec40] ret_from_kernel_thread+0x5c/0x64 Signed-off-by: Kevin Hao Signed-off-by: Herbert Xu --- drivers/crypto/talitos.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/talitos.c b/drivers/crypto/talitos.c index 5967667e1a8f..624b8be0c365 100644 --- a/drivers/crypto/talitos.c +++ b/drivers/crypto/talitos.c @@ -2637,6 +2637,8 @@ static int talitos_probe(struct platform_device *ofdev) if (!priv) return -ENOMEM; + INIT_LIST_HEAD(&priv->alg_list); + dev_set_drvdata(dev, priv); priv->ofdev = ofdev; @@ -2657,8 +2659,6 @@ static int talitos_probe(struct platform_device *ofdev) (unsigned long)dev); } - INIT_LIST_HEAD(&priv->alg_list); - priv->reg = of_iomap(np, 0); if (!priv->reg) { dev_err(dev, "failed to of_iomap\n"); From e921f0307531b27dbe34c17e8a5be5a88010d179 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 28 Jan 2014 22:36:11 -0200 Subject: [PATCH 09/60] crypto: mxs-dcp: Use devm_kzalloc() Using devm_kzalloc() can make the code cleaner. While at it, remove the devm_kzalloc error message as there is standard OOM message done by the core. Signed-off-by: Fabio Estevam Acked-by: Marek Vasut Signed-off-by: Herbert Xu --- drivers/crypto/mxs-dcp.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index 56bde65ddadf..30941d0c5803 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -942,9 +942,8 @@ static int mxs_dcp_probe(struct platform_device *pdev) } /* Allocate coherent helper block. */ - sdcp->coh = kzalloc(sizeof(struct dcp_coherent_block), GFP_KERNEL); + sdcp->coh = devm_kzalloc(dev, sizeof(*sdcp->coh), GFP_KERNEL); if (!sdcp->coh) { - dev_err(dev, "Error allocating coherent block\n"); ret = -ENOMEM; goto err_mutex; } @@ -989,7 +988,7 @@ static int mxs_dcp_probe(struct platform_device *pdev) if (IS_ERR(sdcp->thread[DCP_CHAN_HASH_SHA])) { dev_err(dev, "Error starting SHA thread!\n"); ret = PTR_ERR(sdcp->thread[DCP_CHAN_HASH_SHA]); - goto err_free_coherent; + goto err_mutex; } sdcp->thread[DCP_CHAN_CRYPTO] = kthread_run(dcp_chan_thread_aes, @@ -1047,8 +1046,6 @@ err_destroy_aes_thread: err_destroy_sha_thread: kthread_stop(sdcp->thread[DCP_CHAN_HASH_SHA]); -err_free_coherent: - kfree(sdcp->coh); err_mutex: mutex_unlock(&global_mutex); return ret; @@ -1058,8 +1055,6 @@ static int mxs_dcp_remove(struct platform_device *pdev) { struct dcp *sdcp = platform_get_drvdata(pdev); - kfree(sdcp->coh); - if (sdcp->caps & MXS_DCP_CAPABILITY1_SHA256) crypto_unregister_ahash(&dcp_sha256_alg); From fecfd7f7e91fc1e82d44b0e64a6bda8133f2037b Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 28 Jan 2014 22:36:12 -0200 Subject: [PATCH 10/60] crypto: mxs-dcp: Check the return value of stmp_reset_block() stmp_reset_block() may fail, so check its return value and propagate it in the case of error. Signed-off-by: Fabio Estevam Acked-by: Marek Vasut Signed-off-by: Herbert Xu --- drivers/crypto/mxs-dcp.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index 30941d0c5803..37e070670702 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -949,7 +949,9 @@ static int mxs_dcp_probe(struct platform_device *pdev) } /* Restart the DCP block. */ - stmp_reset_block(sdcp->base); + ret = stmp_reset_block(sdcp->base); + if (ret) + goto err_mutex; /* Initialize control register. */ writel(MXS_DCP_CTRL_GATHER_RESIDUAL_WRITES | From d167b6e1fb8ad386b17485ca88804d14f1695805 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 30 Jan 2014 14:49:43 +0300 Subject: [PATCH 11/60] hwrng: cleanup in hwrng_register() My static checker complains that: drivers/char/hw_random/core.c:341 hwrng_register() warn: we tested 'old_rng' before and it was 'false' The problem is that sometimes we test "if (!old_rng)" and sometimes we test "if (must_register_misc)". The static checker knows they are equivalent but a human being reading the code could easily be confused. I have simplified the code by removing the "must_register_misc" variable and I have removed the redundant check on "if (!old_rng)". Signed-off-by: Dan Carpenter Reviewed-by: Rusty Russell Signed-off-by: Herbert Xu --- drivers/char/hw_random/core.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index a0f7724852eb..cf49f1c88f01 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -302,7 +302,6 @@ err_misc_dereg: int hwrng_register(struct hwrng *rng) { - int must_register_misc; int err = -EINVAL; struct hwrng *old_rng, *tmp; @@ -327,7 +326,6 @@ int hwrng_register(struct hwrng *rng) goto out_unlock; } - must_register_misc = (current_rng == NULL); old_rng = current_rng; if (!old_rng) { err = hwrng_init(rng); @@ -336,13 +334,11 @@ int hwrng_register(struct hwrng *rng) current_rng = rng; } err = 0; - if (must_register_misc) { + if (!old_rng) { err = register_miscdev(); if (err) { - if (!old_rng) { - hwrng_cleanup(rng); - current_rng = NULL; - } + hwrng_cleanup(rng); + current_rng = NULL; goto out_unlock; } } From 883619a931e9f54fca7495321b339669f11cc727 Mon Sep 17 00:00:00 2001 From: Alex Porosanu Date: Thu, 6 Feb 2014 10:27:19 +0200 Subject: [PATCH 12/60] crypto: caam - fix ERA retrieval function SEC ERA has to be retrieved by reading the "fsl,sec-era" property from the device tree. This property is updated/filled in by u-boot. Signed-off-by: Alex Porosanu Reviewed-by: Horia Geanta Signed-off-by: Herbert Xu --- drivers/crypto/caam/ctrl.c | 36 ++++++++++-------------------------- drivers/crypto/caam/ctrl.h | 2 +- 2 files changed, 11 insertions(+), 27 deletions(-) diff --git a/drivers/crypto/caam/ctrl.c b/drivers/crypto/caam/ctrl.c index 30f434fd5dec..1c38f86bf63a 100644 --- a/drivers/crypto/caam/ctrl.c +++ b/drivers/crypto/caam/ctrl.c @@ -14,7 +14,6 @@ #include "jr.h" #include "desc_constr.h" #include "error.h" -#include "ctrl.h" /* * Descriptor to instantiate RNG State Handle 0 in normal mode and @@ -352,32 +351,17 @@ static void kick_trng(struct platform_device *pdev, int ent_delay) /** * caam_get_era() - Return the ERA of the SEC on SoC, based - * on the SEC_VID register. - * Returns the ERA number (1..4) or -ENOTSUPP if the ERA is unknown. - * @caam_id - the value of the SEC_VID register + * on "sec-era" propery in the DTS. This property is updated by u-boot. **/ -int caam_get_era(u64 caam_id) +int caam_get_era(void) { - struct sec_vid *sec_vid = (struct sec_vid *)&caam_id; - static const struct { - u16 ip_id; - u8 maj_rev; - u8 era; - } caam_eras[] = { - {0x0A10, 1, 1}, - {0x0A10, 2, 2}, - {0x0A12, 1, 3}, - {0x0A14, 1, 3}, - {0x0A14, 2, 4}, - {0x0A16, 1, 4}, - {0x0A11, 1, 4} - }; - int i; - - for (i = 0; i < ARRAY_SIZE(caam_eras); i++) - if (caam_eras[i].ip_id == sec_vid->ip_id && - caam_eras[i].maj_rev == sec_vid->maj_rev) - return caam_eras[i].era; + struct device_node *caam_node; + for_each_compatible_node(caam_node, NULL, "fsl,sec-v4.0") { + const uint32_t *prop = (uint32_t *)of_get_property(caam_node, + "fsl,sec-era", + NULL); + return prop ? *prop : -ENOTSUPP; + } return -ENOTSUPP; } @@ -551,7 +535,7 @@ static int caam_probe(struct platform_device *pdev) /* Report "alive" for developer to see */ dev_info(dev, "device ID = 0x%016llx (Era %d)\n", caam_id, - caam_get_era(caam_id)); + caam_get_era()); dev_info(dev, "job rings = %d, qi = %d\n", ctrlpriv->total_jobrs, ctrlpriv->qi_present); diff --git a/drivers/crypto/caam/ctrl.h b/drivers/crypto/caam/ctrl.h index 980d44eaaf40..cac5402a46eb 100644 --- a/drivers/crypto/caam/ctrl.h +++ b/drivers/crypto/caam/ctrl.h @@ -8,6 +8,6 @@ #define CTRL_H /* Prototypes for backend-level services exposed to APIs */ -int caam_get_era(u64 caam_id); +int caam_get_era(void); #endif /* CTRL_H */ From 06e5a1f29819759392239669beb2cad27059c8ec Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Thu, 23 Jan 2014 03:25:47 -0800 Subject: [PATCH 13/60] CRC32C: Add soft module dependency to load other accelerated crc32c modules We added the soft module dependency of crc32c module alias to generic crc32c module so other hardware accelerated crc32c modules could get loaded and used before the generic version. We also renamed the crypto/crc32c.c containing the generic crc32c crypto computation to crypto/crc32c_generic.c according to convention. Signed-off-by: Tim Chen Signed-off-by: Herbert Xu --- crypto/Makefile | 2 +- crypto/{crc32c.c => crc32c_generic.c} | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) rename crypto/{crc32c.c => crc32c_generic.c} (98%) diff --git a/crypto/Makefile b/crypto/Makefile index b29402a7b9b5..38e64231dcd3 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -81,7 +81,7 @@ obj-$(CONFIG_CRYPTO_SALSA20) += salsa20_generic.o obj-$(CONFIG_CRYPTO_DEFLATE) += deflate.o obj-$(CONFIG_CRYPTO_ZLIB) += zlib.o obj-$(CONFIG_CRYPTO_MICHAEL_MIC) += michael_mic.o -obj-$(CONFIG_CRYPTO_CRC32C) += crc32c.o +obj-$(CONFIG_CRYPTO_CRC32C) += crc32c_generic.o obj-$(CONFIG_CRYPTO_CRC32) += crc32.o obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_common.o crct10dif_generic.o obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o diff --git a/crypto/crc32c.c b/crypto/crc32c_generic.c similarity index 98% rename from crypto/crc32c.c rename to crypto/crc32c_generic.c index 06f7018c9d95..d9c7beba8e50 100644 --- a/crypto/crc32c.c +++ b/crypto/crc32c_generic.c @@ -170,3 +170,5 @@ module_exit(crc32c_mod_fini); MODULE_AUTHOR("Clay Haapala "); MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations wrapper for lib/crc32c"); MODULE_LICENSE("GPL"); +MODULE_ALIAS("crc32c"); +MODULE_SOFTDEP("pre: crc32c"); From 32af1e180f53fd6c60355b24a5fa9bc45dfa6fcc Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 12 Feb 2014 13:28:59 +0900 Subject: [PATCH 14/60] crypto: picoxcell - Use devm_ioremap_resource() Use devm_ioremap_resource() in order to make the code simpler, and remove redundant return value check of platform_get_resource() because the value is checked by devm_ioremap_resource(). Signed-off-by: Jingoo Han Signed-off-by: Herbert Xu --- drivers/crypto/picoxcell_crypto.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/drivers/crypto/picoxcell_crypto.c b/drivers/crypto/picoxcell_crypto.c index a6175ba6d238..5da5b98b8f29 100644 --- a/drivers/crypto/picoxcell_crypto.c +++ b/drivers/crypto/picoxcell_crypto.c @@ -1720,22 +1720,16 @@ static int spacc_probe(struct platform_device *pdev) engine->name = dev_name(&pdev->dev); mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); + engine->regs = devm_ioremap_resource(&pdev->dev, mem); + if (IS_ERR(engine->regs)) + return PTR_ERR(engine->regs); + irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - if (!mem || !irq) { + if (!irq) { dev_err(&pdev->dev, "no memory/irq resource for engine\n"); return -ENXIO; } - if (!devm_request_mem_region(&pdev->dev, mem->start, resource_size(mem), - engine->name)) - return -ENOMEM; - - engine->regs = devm_ioremap(&pdev->dev, mem->start, resource_size(mem)); - if (!engine->regs) { - dev_err(&pdev->dev, "memory map failed\n"); - return -ENOMEM; - } - if (devm_request_irq(&pdev->dev, irq->start, spacc_spacc_irq, 0, engine->name, engine)) { dev_err(engine->dev, "failed to request IRQ\n"); From 9e95275cf351ebbb02316addfa2d8d87173a4cd7 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 12 Feb 2014 13:23:37 +0900 Subject: [PATCH 15/60] crypto: sahara - Use devm_ioremap_resource() Use devm_ioremap_resource() in order to make the code simpler, and remove redundant return value check of platform_get_resource() because the value is checked by devm_ioremap_resource(). Signed-off-by: Jingoo Han Signed-off-by: Herbert Xu --- drivers/crypto/sahara.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c index 785a9ded7bdf..894468fdb02d 100644 --- a/drivers/crypto/sahara.c +++ b/drivers/crypto/sahara.c @@ -885,22 +885,9 @@ static int sahara_probe(struct platform_device *pdev) /* Get the base address */ res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_err(&pdev->dev, "failed to get memory region resource\n"); - return -ENODEV; - } - - if (devm_request_mem_region(&pdev->dev, res->start, - resource_size(res), SAHARA_NAME) == NULL) { - dev_err(&pdev->dev, "failed to request memory region\n"); - return -ENOENT; - } - dev->regs_base = devm_ioremap(&pdev->dev, res->start, - resource_size(res)); - if (!dev->regs_base) { - dev_err(&pdev->dev, "failed to ioremap address region\n"); - return -ENOENT; - } + dev->regs_base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(dev->regs_base)) + return PTR_ERR(dev->regs_base); /* Get the IRQ */ irq = platform_get_irq(pdev, 0); From 0fdefe2c907610f3efb3163c8b72127007e282aa Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 12 Feb 2014 13:24:57 +0900 Subject: [PATCH 16/60] crypto: s5p-sss - Use devm_ioremap_resource() Use devm_ioremap_resource() in order to make the code simpler, and remove redundant return value check of platform_get_resource() because the value is checked by devm_ioremap_resource(). Signed-off-by: Jingoo Han Signed-off-by: Herbert Xu --- drivers/crypto/s5p-sss.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/crypto/s5p-sss.c b/drivers/crypto/s5p-sss.c index cf149b19ff47..be45762f390a 100644 --- a/drivers/crypto/s5p-sss.c +++ b/drivers/crypto/s5p-sss.c @@ -568,17 +568,14 @@ static int s5p_aes_probe(struct platform_device *pdev) if (s5p_dev) return -EEXIST; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) - return -ENODEV; - pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL); if (!pdata) return -ENOMEM; - if (!devm_request_mem_region(dev, res->start, - resource_size(res), pdev->name)) - return -EBUSY; + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + pdata->ioaddr = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(pdata->ioaddr)) + return PTR_ERR(pdata->ioaddr); pdata->clk = devm_clk_get(dev, "secss"); if (IS_ERR(pdata->clk)) { @@ -589,8 +586,6 @@ static int s5p_aes_probe(struct platform_device *pdev) clk_enable(pdata->clk); spin_lock_init(&pdata->lock); - pdata->ioaddr = devm_ioremap(dev, res->start, - resource_size(res)); pdata->irq_hash = platform_get_irq_byname(pdev, "hash"); if (pdata->irq_hash < 0) { From bfaff75b09c42d8fc57b020034e836fe9048f8b1 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 12 Feb 2014 14:17:08 +0900 Subject: [PATCH 17/60] hwrng: atmel - Use devm_ioremap_resource() Use devm_ioremap_resource() in order to make the code simpler, and remove redundant return value check of platform_get_resource() because the value is checked by devm_ioremap_resource(). Signed-off-by: Jingoo Han Acked-by: Peter Korsgaard Acked-by: Nicolas Ferre Signed-off-by: Herbert Xu --- drivers/char/hw_random/atmel-rng.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/drivers/char/hw_random/atmel-rng.c b/drivers/char/hw_random/atmel-rng.c index bf9fc6b79328..dfeddf2c00b7 100644 --- a/drivers/char/hw_random/atmel-rng.c +++ b/drivers/char/hw_random/atmel-rng.c @@ -54,21 +54,14 @@ static int atmel_trng_probe(struct platform_device *pdev) struct resource *res; int ret; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) - return -EINVAL; - trng = devm_kzalloc(&pdev->dev, sizeof(*trng), GFP_KERNEL); if (!trng) return -ENOMEM; - if (!devm_request_mem_region(&pdev->dev, res->start, - resource_size(res), pdev->name)) - return -EBUSY; - - trng->base = devm_ioremap(&pdev->dev, res->start, resource_size(res)); - if (!trng->base) - return -EBUSY; + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + trng->base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(trng->base)) + return PTR_ERR(trng->base); trng->clk = clk_get(&pdev->dev, NULL); if (IS_ERR(trng->clk)) From fb5d23e3e09d6c9719b783ac1955c085aca3d4ae Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Wed, 12 Feb 2014 14:19:48 +0900 Subject: [PATCH 18/60] hwrng: pixocell - Use devm_ioremap_resource() Use devm_ioremap_resource() in order to make the code simpler, and remove redundant return value check of platform_get_resource() because the value is checked by devm_ioremap_resource(). Signed-off-by: Jingoo Han Signed-off-by: Herbert Xu --- drivers/char/hw_random/picoxcell-rng.c | 19 +++---------------- 1 file changed, 3 insertions(+), 16 deletions(-) diff --git a/drivers/char/hw_random/picoxcell-rng.c b/drivers/char/hw_random/picoxcell-rng.c index 3d4c2293c6f5..c03beeeb1aea 100644 --- a/drivers/char/hw_random/picoxcell-rng.c +++ b/drivers/char/hw_random/picoxcell-rng.c @@ -104,22 +104,9 @@ static int picoxcell_trng_probe(struct platform_device *pdev) int ret; struct resource *mem = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!mem) { - dev_warn(&pdev->dev, "no memory resource\n"); - return -ENOMEM; - } - - if (!devm_request_mem_region(&pdev->dev, mem->start, resource_size(mem), - "picoxcell_trng")) { - dev_warn(&pdev->dev, "unable to request io mem\n"); - return -EBUSY; - } - - rng_base = devm_ioremap(&pdev->dev, mem->start, resource_size(mem)); - if (!rng_base) { - dev_warn(&pdev->dev, "unable to remap io mem\n"); - return -ENOMEM; - } + rng_base = devm_ioremap_resource(&pdev->dev, mem); + if (IS_ERR(rng_base)) + return PTR_ERR(rng_base); rng_clk = clk_get(&pdev->dev, NULL); if (IS_ERR(rng_clk)) { From b3bd5869fd65f509d96a9fcb47cdea81163a811d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 13 Feb 2014 17:58:32 +0300 Subject: [PATCH 19/60] crypto: remove a duplicate checks in __cbc_decrypt() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We checked "nbytes < bsize" before so it can't happen here. Signed-off-by: Dan Carpenter Acked-by: Jussi Kivilinna Acked-by: Johannes Götzfried Signed-off-by: Herbert Xu --- arch/x86/crypto/blowfish_glue.c | 3 --- arch/x86/crypto/cast5_avx_glue.c | 3 --- 2 files changed, 6 deletions(-) diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c index 50ec333b70e6..8af519ed73d1 100644 --- a/arch/x86/crypto/blowfish_glue.c +++ b/arch/x86/crypto/blowfish_glue.c @@ -223,9 +223,6 @@ static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, src -= 1; dst -= 1; } while (nbytes >= bsize * 4); - - if (nbytes < bsize) - goto done; } /* Handle leftovers */ diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c index e6a3700489b9..e57e20ab5e0b 100644 --- a/arch/x86/crypto/cast5_avx_glue.c +++ b/arch/x86/crypto/cast5_avx_glue.c @@ -203,9 +203,6 @@ static unsigned int __cbc_decrypt(struct blkcipher_desc *desc, src -= 1; dst -= 1; } while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS); - - if (nbytes < bsize) - goto done; } /* Handle leftovers */ From d9588f874482c0163caf19c2acbe5ce6260d42bf Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Fri, 14 Feb 2014 01:04:44 -0200 Subject: [PATCH 20/60] crypto: mxs-dcp - Fix platform_get_irq() error handling We should test the error case for each platform_get_irq() assignment and propagate the error accordingly. Signed-off-by: Fabio Estevam Acked-by: Marek Vasut Signed-off-by: Herbert Xu --- drivers/crypto/mxs-dcp.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index 37e070670702..08761d61d4f5 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -908,9 +908,14 @@ static int mxs_dcp_probe(struct platform_device *pdev) iores = platform_get_resource(pdev, IORESOURCE_MEM, 0); dcp_vmi_irq = platform_get_irq(pdev, 0); + if (dcp_vmi_irq < 0) { + ret = dcp_vmi_irq; + goto err_mutex; + } + dcp_irq = platform_get_irq(pdev, 1); - if (dcp_vmi_irq < 0 || dcp_irq < 0) { - ret = -EINVAL; + if (dcp_irq < 0) { + ret = dcp_irq; goto err_mutex; } From e91aa9d50c3561c4a40b87d1d302db851f07ef31 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Fri, 14 Feb 2014 10:49:10 -0600 Subject: [PATCH 21/60] crypto: omap-des - Add omap-des driver for OMAP4/AM43xx Add omap-des driver with platform data for OMAP4/AM43xx. Support added for DES ECB and CBC modes. Also add support for 3DES operation where 3 64-bit keys are used to perform a DES encrypt-decrypt-encrypt (des3_ede) operation on a buffer. Tests have been conducted with the CRYPTO test manager, and functionality is verified at different page length alignments. Signed-off-by: Joel Fernandes Signed-off-by: Herbert Xu --- drivers/crypto/omap-des.c | 1218 +++++++++++++++++++++++++++++++++++++ 1 file changed, 1218 insertions(+) create mode 100644 drivers/crypto/omap-des.c diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c new file mode 100644 index 000000000000..006d9144485f --- /dev/null +++ b/drivers/crypto/omap-des.c @@ -0,0 +1,1218 @@ +/* + * Support for OMAP DES and Triple DES HW acceleration. + * + * Copyright (c) 2013 Texas Instruments Incorporated + * Author: Joel Fernandes + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + */ + +#define pr_fmt(fmt) "%s: " fmt, __func__ + +#ifdef DEBUG +#define prn(num) printk(#num "=%d\n", num) +#define prx(num) printk(#num "=%x\n", num) +#else +#define prn(num) do { } while (0) +#define prx(num) do { } while (0) +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DST_MAXBURST 2 + +#define DES_BLOCK_WORDS (DES_BLOCK_SIZE >> 2) + +#define _calc_walked(inout) (dd->inout##_walk.offset - dd->inout##_sg->offset) + +#define DES_REG_KEY(dd, x) ((dd)->pdata->key_ofs - \ + ((x ^ 0x01) * 0x04)) + +#define DES_REG_IV(dd, x) ((dd)->pdata->iv_ofs + ((x) * 0x04)) + +#define DES_REG_CTRL(dd) ((dd)->pdata->ctrl_ofs) +#define DES_REG_CTRL_CBC BIT(4) +#define DES_REG_CTRL_TDES BIT(3) +#define DES_REG_CTRL_DIRECTION BIT(2) +#define DES_REG_CTRL_INPUT_READY BIT(1) +#define DES_REG_CTRL_OUTPUT_READY BIT(0) + +#define DES_REG_DATA_N(dd, x) ((dd)->pdata->data_ofs + ((x) * 0x04)) + +#define DES_REG_REV(dd) ((dd)->pdata->rev_ofs) + +#define DES_REG_MASK(dd) ((dd)->pdata->mask_ofs) + +#define DES_REG_LENGTH_N(x) (0x24 + ((x) * 0x04)) + +#define DES_REG_IRQ_STATUS(dd) ((dd)->pdata->irq_status_ofs) +#define DES_REG_IRQ_ENABLE(dd) ((dd)->pdata->irq_enable_ofs) +#define DES_REG_IRQ_DATA_IN BIT(1) +#define DES_REG_IRQ_DATA_OUT BIT(2) + +#define FLAGS_MODE_MASK 0x000f +#define FLAGS_ENCRYPT BIT(0) +#define FLAGS_CBC BIT(1) +#define FLAGS_INIT BIT(4) +#define FLAGS_BUSY BIT(6) + +struct omap_des_ctx { + struct omap_des_dev *dd; + + int keylen; + u32 key[(3 * DES_KEY_SIZE) / sizeof(u32)]; + unsigned long flags; +}; + +struct omap_des_reqctx { + unsigned long mode; +}; + +#define OMAP_DES_QUEUE_LENGTH 1 +#define OMAP_DES_CACHE_SIZE 0 + +struct omap_des_algs_info { + struct crypto_alg *algs_list; + unsigned int size; + unsigned int registered; +}; + +struct omap_des_pdata { + struct omap_des_algs_info *algs_info; + unsigned int algs_info_size; + + void (*trigger)(struct omap_des_dev *dd, int length); + + u32 key_ofs; + u32 iv_ofs; + u32 ctrl_ofs; + u32 data_ofs; + u32 rev_ofs; + u32 mask_ofs; + u32 irq_enable_ofs; + u32 irq_status_ofs; + + u32 dma_enable_in; + u32 dma_enable_out; + u32 dma_start; + + u32 major_mask; + u32 major_shift; + u32 minor_mask; + u32 minor_shift; +}; + +struct omap_des_dev { + struct list_head list; + unsigned long phys_base; + void __iomem *io_base; + struct omap_des_ctx *ctx; + struct device *dev; + unsigned long flags; + int err; + + /* spinlock used for queues */ + spinlock_t lock; + struct crypto_queue queue; + + struct tasklet_struct done_task; + struct tasklet_struct queue_task; + + struct ablkcipher_request *req; + /* + * total is used by PIO mode for book keeping so introduce + * variable total_save as need it to calc page_order + */ + size_t total; + size_t total_save; + + struct scatterlist *in_sg; + struct scatterlist *out_sg; + + /* Buffers for copying for unaligned cases */ + struct scatterlist in_sgl; + struct scatterlist out_sgl; + struct scatterlist *orig_out; + int sgs_copied; + + struct scatter_walk in_walk; + struct scatter_walk out_walk; + int dma_in; + struct dma_chan *dma_lch_in; + int dma_out; + struct dma_chan *dma_lch_out; + int in_sg_len; + int out_sg_len; + int pio_only; + const struct omap_des_pdata *pdata; +}; + +/* keep registered devices data here */ +static LIST_HEAD(dev_list); +static DEFINE_SPINLOCK(list_lock); + +#ifdef DEBUG +#define omap_des_read(dd, offset) \ + ({ \ + int _read_ret; \ + _read_ret = __raw_readl(dd->io_base + offset); \ + pr_err("omap_des_read(" #offset "=%#x)= %#x\n", \ + offset, _read_ret); \ + _read_ret; \ + }) +#else +static inline u32 omap_des_read(struct omap_des_dev *dd, u32 offset) +{ + return __raw_readl(dd->io_base + offset); +} +#endif + +#ifdef DEBUG +#define omap_des_write(dd, offset, value) \ + do { \ + pr_err("omap_des_write(" #offset "=%#x) value=%#x\n", \ + offset, value); \ + __raw_writel(value, dd->io_base + offset); \ + } while (0) +#else +static inline void omap_des_write(struct omap_des_dev *dd, u32 offset, + u32 value) +{ + __raw_writel(value, dd->io_base + offset); +} +#endif + +static inline void omap_des_write_mask(struct omap_des_dev *dd, u32 offset, + u32 value, u32 mask) +{ + u32 val; + + val = omap_des_read(dd, offset); + val &= ~mask; + val |= value; + omap_des_write(dd, offset, val); +} + +static void omap_des_write_n(struct omap_des_dev *dd, u32 offset, + u32 *value, int count) +{ + for (; count--; value++, offset += 4) + omap_des_write(dd, offset, *value); +} + +static int omap_des_hw_init(struct omap_des_dev *dd) +{ + /* + * clocks are enabled when request starts and disabled when finished. + * It may be long delays between requests. + * Device might go to off mode to save power. + */ + pm_runtime_get_sync(dd->dev); + + if (!(dd->flags & FLAGS_INIT)) { + dd->flags |= FLAGS_INIT; + dd->err = 0; + } + + return 0; +} + +static int omap_des_write_ctrl(struct omap_des_dev *dd) +{ + unsigned int key32; + int i, err; + u32 val = 0, mask = 0; + + err = omap_des_hw_init(dd); + if (err) + return err; + + key32 = dd->ctx->keylen / sizeof(u32); + + /* it seems a key should always be set even if it has not changed */ + for (i = 0; i < key32; i++) { + omap_des_write(dd, DES_REG_KEY(dd, i), + __le32_to_cpu(dd->ctx->key[i])); + } + + if ((dd->flags & FLAGS_CBC) && dd->req->info) + omap_des_write_n(dd, DES_REG_IV(dd, 0), dd->req->info, 2); + + if (dd->flags & FLAGS_CBC) + val |= DES_REG_CTRL_CBC; + if (dd->flags & FLAGS_ENCRYPT) + val |= DES_REG_CTRL_DIRECTION; + if (key32 == 6) + val |= DES_REG_CTRL_TDES; + + mask |= DES_REG_CTRL_CBC | DES_REG_CTRL_DIRECTION | DES_REG_CTRL_TDES; + + omap_des_write_mask(dd, DES_REG_CTRL(dd), val, mask); + + return 0; +} + +static void omap_des_dma_trigger_omap4(struct omap_des_dev *dd, int length) +{ + u32 mask, val; + + omap_des_write(dd, DES_REG_LENGTH_N(0), length); + + val = dd->pdata->dma_start; + + if (dd->dma_lch_out != NULL) + val |= dd->pdata->dma_enable_out; + if (dd->dma_lch_in != NULL) + val |= dd->pdata->dma_enable_in; + + mask = dd->pdata->dma_enable_out | dd->pdata->dma_enable_in | + dd->pdata->dma_start; + + omap_des_write_mask(dd, DES_REG_MASK(dd), val, mask); +} + +static void omap_des_dma_stop(struct omap_des_dev *dd) +{ + u32 mask; + + mask = dd->pdata->dma_enable_out | dd->pdata->dma_enable_in | + dd->pdata->dma_start; + + omap_des_write_mask(dd, DES_REG_MASK(dd), 0, mask); +} + +static struct omap_des_dev *omap_des_find_dev(struct omap_des_ctx *ctx) +{ + struct omap_des_dev *dd = NULL, *tmp; + + spin_lock_bh(&list_lock); + if (!ctx->dd) { + list_for_each_entry(tmp, &dev_list, list) { + /* FIXME: take fist available des core */ + dd = tmp; + break; + } + ctx->dd = dd; + } else { + /* already found before */ + dd = ctx->dd; + } + spin_unlock_bh(&list_lock); + + return dd; +} + +static void omap_des_dma_out_callback(void *data) +{ + struct omap_des_dev *dd = data; + + /* dma_lch_out - completed */ + tasklet_schedule(&dd->done_task); +} + +static int omap_des_dma_init(struct omap_des_dev *dd) +{ + int err = -ENOMEM; + dma_cap_mask_t mask; + + dd->dma_lch_out = NULL; + dd->dma_lch_in = NULL; + + dma_cap_zero(mask); + dma_cap_set(DMA_SLAVE, mask); + + dd->dma_lch_in = dma_request_slave_channel_compat(mask, + omap_dma_filter_fn, + &dd->dma_in, + dd->dev, "rx"); + if (!dd->dma_lch_in) { + dev_err(dd->dev, "Unable to request in DMA channel\n"); + goto err_dma_in; + } + + dd->dma_lch_out = dma_request_slave_channel_compat(mask, + omap_dma_filter_fn, + &dd->dma_out, + dd->dev, "tx"); + if (!dd->dma_lch_out) { + dev_err(dd->dev, "Unable to request out DMA channel\n"); + goto err_dma_out; + } + + return 0; + +err_dma_out: + dma_release_channel(dd->dma_lch_in); +err_dma_in: + if (err) + pr_err("error: %d\n", err); + return err; +} + +static void omap_des_dma_cleanup(struct omap_des_dev *dd) +{ + dma_release_channel(dd->dma_lch_out); + dma_release_channel(dd->dma_lch_in); +} + +static void sg_copy_buf(void *buf, struct scatterlist *sg, + unsigned int start, unsigned int nbytes, int out) +{ + struct scatter_walk walk; + + if (!nbytes) + return; + + scatterwalk_start(&walk, sg); + scatterwalk_advance(&walk, start); + scatterwalk_copychunks(buf, &walk, nbytes, out); + scatterwalk_done(&walk, out, 0); +} + +static int omap_des_crypt_dma(struct crypto_tfm *tfm, + struct scatterlist *in_sg, struct scatterlist *out_sg, + int in_sg_len, int out_sg_len) +{ + struct omap_des_ctx *ctx = crypto_tfm_ctx(tfm); + struct omap_des_dev *dd = ctx->dd; + struct dma_async_tx_descriptor *tx_in, *tx_out; + struct dma_slave_config cfg; + int ret; + + if (dd->pio_only) { + scatterwalk_start(&dd->in_walk, dd->in_sg); + scatterwalk_start(&dd->out_walk, dd->out_sg); + + /* Enable DATAIN interrupt and let it take + care of the rest */ + omap_des_write(dd, DES_REG_IRQ_ENABLE(dd), 0x2); + return 0; + } + + dma_sync_sg_for_device(dd->dev, dd->in_sg, in_sg_len, DMA_TO_DEVICE); + + memset(&cfg, 0, sizeof(cfg)); + + cfg.src_addr = dd->phys_base + DES_REG_DATA_N(dd, 0); + cfg.dst_addr = dd->phys_base + DES_REG_DATA_N(dd, 0); + cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + cfg.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; + cfg.src_maxburst = DST_MAXBURST; + cfg.dst_maxburst = DST_MAXBURST; + + /* IN */ + ret = dmaengine_slave_config(dd->dma_lch_in, &cfg); + if (ret) { + dev_err(dd->dev, "can't configure IN dmaengine slave: %d\n", + ret); + return ret; + } + + tx_in = dmaengine_prep_slave_sg(dd->dma_lch_in, in_sg, in_sg_len, + DMA_MEM_TO_DEV, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + if (!tx_in) { + dev_err(dd->dev, "IN prep_slave_sg() failed\n"); + return -EINVAL; + } + + /* No callback necessary */ + tx_in->callback_param = dd; + + /* OUT */ + ret = dmaengine_slave_config(dd->dma_lch_out, &cfg); + if (ret) { + dev_err(dd->dev, "can't configure OUT dmaengine slave: %d\n", + ret); + return ret; + } + + tx_out = dmaengine_prep_slave_sg(dd->dma_lch_out, out_sg, out_sg_len, + DMA_DEV_TO_MEM, + DMA_PREP_INTERRUPT | DMA_CTRL_ACK); + if (!tx_out) { + dev_err(dd->dev, "OUT prep_slave_sg() failed\n"); + return -EINVAL; + } + + tx_out->callback = omap_des_dma_out_callback; + tx_out->callback_param = dd; + + dmaengine_submit(tx_in); + dmaengine_submit(tx_out); + + dma_async_issue_pending(dd->dma_lch_in); + dma_async_issue_pending(dd->dma_lch_out); + + /* start DMA */ + dd->pdata->trigger(dd, dd->total); + + return 0; +} + +static int omap_des_crypt_dma_start(struct omap_des_dev *dd) +{ + struct crypto_tfm *tfm = crypto_ablkcipher_tfm( + crypto_ablkcipher_reqtfm(dd->req)); + int err; + + pr_debug("total: %d\n", dd->total); + + if (!dd->pio_only) { + err = dma_map_sg(dd->dev, dd->in_sg, dd->in_sg_len, + DMA_TO_DEVICE); + if (!err) { + dev_err(dd->dev, "dma_map_sg() error\n"); + return -EINVAL; + } + + err = dma_map_sg(dd->dev, dd->out_sg, dd->out_sg_len, + DMA_FROM_DEVICE); + if (!err) { + dev_err(dd->dev, "dma_map_sg() error\n"); + return -EINVAL; + } + } + + err = omap_des_crypt_dma(tfm, dd->in_sg, dd->out_sg, dd->in_sg_len, + dd->out_sg_len); + if (err && !dd->pio_only) { + dma_unmap_sg(dd->dev, dd->in_sg, dd->in_sg_len, DMA_TO_DEVICE); + dma_unmap_sg(dd->dev, dd->out_sg, dd->out_sg_len, + DMA_FROM_DEVICE); + } + + return err; +} + +static void omap_des_finish_req(struct omap_des_dev *dd, int err) +{ + struct ablkcipher_request *req = dd->req; + + pr_debug("err: %d\n", err); + + pm_runtime_put(dd->dev); + dd->flags &= ~FLAGS_BUSY; + + req->base.complete(&req->base, err); +} + +static int omap_des_crypt_dma_stop(struct omap_des_dev *dd) +{ + int err = 0; + + pr_debug("total: %d\n", dd->total); + + omap_des_dma_stop(dd); + + dmaengine_terminate_all(dd->dma_lch_in); + dmaengine_terminate_all(dd->dma_lch_out); + + dma_unmap_sg(dd->dev, dd->in_sg, dd->in_sg_len, DMA_TO_DEVICE); + dma_unmap_sg(dd->dev, dd->out_sg, dd->out_sg_len, DMA_FROM_DEVICE); + + return err; +} + +int omap_des_copy_needed(struct scatterlist *sg) +{ + while (sg) { + if (!IS_ALIGNED(sg->offset, 4)) + return -1; + if (!IS_ALIGNED(sg->length, DES_BLOCK_SIZE)) + return -1; + sg = sg_next(sg); + } + return 0; +} + +int omap_des_copy_sgs(struct omap_des_dev *dd) +{ + void *buf_in, *buf_out; + int pages; + + pages = dd->total >> PAGE_SHIFT; + + if (dd->total & (PAGE_SIZE-1)) + pages++; + + BUG_ON(!pages); + + buf_in = (void *)__get_free_pages(GFP_ATOMIC, pages); + buf_out = (void *)__get_free_pages(GFP_ATOMIC, pages); + + if (!buf_in || !buf_out) { + pr_err("Couldn't allocated pages for unaligned cases.\n"); + return -1; + } + + dd->orig_out = dd->out_sg; + + sg_copy_buf(buf_in, dd->in_sg, 0, dd->total, 0); + + sg_init_table(&dd->in_sgl, 1); + sg_set_buf(&dd->in_sgl, buf_in, dd->total); + dd->in_sg = &dd->in_sgl; + + sg_init_table(&dd->out_sgl, 1); + sg_set_buf(&dd->out_sgl, buf_out, dd->total); + dd->out_sg = &dd->out_sgl; + + return 0; +} + +static int omap_des_handle_queue(struct omap_des_dev *dd, + struct ablkcipher_request *req) +{ + struct crypto_async_request *async_req, *backlog; + struct omap_des_ctx *ctx; + struct omap_des_reqctx *rctx; + unsigned long flags; + int err, ret = 0; + + spin_lock_irqsave(&dd->lock, flags); + if (req) + ret = ablkcipher_enqueue_request(&dd->queue, req); + if (dd->flags & FLAGS_BUSY) { + spin_unlock_irqrestore(&dd->lock, flags); + return ret; + } + backlog = crypto_get_backlog(&dd->queue); + async_req = crypto_dequeue_request(&dd->queue); + if (async_req) + dd->flags |= FLAGS_BUSY; + spin_unlock_irqrestore(&dd->lock, flags); + + if (!async_req) + return ret; + + if (backlog) + backlog->complete(backlog, -EINPROGRESS); + + req = ablkcipher_request_cast(async_req); + + /* assign new request to device */ + dd->req = req; + dd->total = req->nbytes; + dd->total_save = req->nbytes; + dd->in_sg = req->src; + dd->out_sg = req->dst; + + if (omap_des_copy_needed(dd->in_sg) || + omap_des_copy_needed(dd->out_sg)) { + if (omap_des_copy_sgs(dd)) + pr_err("Failed to copy SGs for unaligned cases\n"); + dd->sgs_copied = 1; + } else { + dd->sgs_copied = 0; + } + + dd->in_sg_len = scatterwalk_bytes_sglen(dd->in_sg, dd->total); + dd->out_sg_len = scatterwalk_bytes_sglen(dd->out_sg, dd->total); + BUG_ON(dd->in_sg_len < 0 || dd->out_sg_len < 0); + + rctx = ablkcipher_request_ctx(req); + ctx = crypto_ablkcipher_ctx(crypto_ablkcipher_reqtfm(req)); + rctx->mode &= FLAGS_MODE_MASK; + dd->flags = (dd->flags & ~FLAGS_MODE_MASK) | rctx->mode; + + dd->ctx = ctx; + ctx->dd = dd; + + err = omap_des_write_ctrl(dd); + if (!err) + err = omap_des_crypt_dma_start(dd); + if (err) { + /* des_task will not finish it, so do it here */ + omap_des_finish_req(dd, err); + tasklet_schedule(&dd->queue_task); + } + + return ret; /* return ret, which is enqueue return value */ +} + +static void omap_des_done_task(unsigned long data) +{ + struct omap_des_dev *dd = (struct omap_des_dev *)data; + void *buf_in, *buf_out; + int pages; + + pr_debug("enter done_task\n"); + + if (!dd->pio_only) { + dma_sync_sg_for_device(dd->dev, dd->out_sg, dd->out_sg_len, + DMA_FROM_DEVICE); + dma_unmap_sg(dd->dev, dd->in_sg, dd->in_sg_len, DMA_TO_DEVICE); + dma_unmap_sg(dd->dev, dd->out_sg, dd->out_sg_len, + DMA_FROM_DEVICE); + omap_des_crypt_dma_stop(dd); + } + + if (dd->sgs_copied) { + buf_in = sg_virt(&dd->in_sgl); + buf_out = sg_virt(&dd->out_sgl); + + sg_copy_buf(buf_out, dd->orig_out, 0, dd->total_save, 1); + + pages = get_order(dd->total_save); + free_pages((unsigned long)buf_in, pages); + free_pages((unsigned long)buf_out, pages); + } + + omap_des_finish_req(dd, 0); + omap_des_handle_queue(dd, NULL); + + pr_debug("exit\n"); +} + +static void omap_des_queue_task(unsigned long data) +{ + struct omap_des_dev *dd = (struct omap_des_dev *)data; + + omap_des_handle_queue(dd, NULL); +} + +static int omap_des_crypt(struct ablkcipher_request *req, unsigned long mode) +{ + struct omap_des_ctx *ctx = crypto_ablkcipher_ctx( + crypto_ablkcipher_reqtfm(req)); + struct omap_des_reqctx *rctx = ablkcipher_request_ctx(req); + struct omap_des_dev *dd; + + pr_debug("nbytes: %d, enc: %d, cbc: %d\n", req->nbytes, + !!(mode & FLAGS_ENCRYPT), + !!(mode & FLAGS_CBC)); + + if (!IS_ALIGNED(req->nbytes, DES_BLOCK_SIZE)) { + pr_err("request size is not exact amount of DES blocks\n"); + return -EINVAL; + } + + dd = omap_des_find_dev(ctx); + if (!dd) + return -ENODEV; + + rctx->mode = mode; + + return omap_des_handle_queue(dd, req); +} + +/* ********************** ALG API ************************************ */ + +static int omap_des_setkey(struct crypto_ablkcipher *tfm, const u8 *key, + unsigned int keylen) +{ + struct omap_des_ctx *ctx = crypto_ablkcipher_ctx(tfm); + + if (keylen != DES_KEY_SIZE && keylen != (3*DES_KEY_SIZE)) + return -EINVAL; + + pr_debug("enter, keylen: %d\n", keylen); + + memcpy(ctx->key, key, keylen); + ctx->keylen = keylen; + + return 0; +} + +static int omap_des_ecb_encrypt(struct ablkcipher_request *req) +{ + return omap_des_crypt(req, FLAGS_ENCRYPT); +} + +static int omap_des_ecb_decrypt(struct ablkcipher_request *req) +{ + return omap_des_crypt(req, 0); +} + +static int omap_des_cbc_encrypt(struct ablkcipher_request *req) +{ + return omap_des_crypt(req, FLAGS_ENCRYPT | FLAGS_CBC); +} + +static int omap_des_cbc_decrypt(struct ablkcipher_request *req) +{ + return omap_des_crypt(req, FLAGS_CBC); +} + +static int omap_des_cra_init(struct crypto_tfm *tfm) +{ + pr_debug("enter\n"); + + tfm->crt_ablkcipher.reqsize = sizeof(struct omap_des_reqctx); + + return 0; +} + +static void omap_des_cra_exit(struct crypto_tfm *tfm) +{ + pr_debug("enter\n"); +} + +/* ********************** ALGS ************************************ */ + +static struct crypto_alg algs_ecb_cbc[] = { +{ + .cra_name = "ecb(des)", + .cra_driver_name = "ecb-des-omap", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | + CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_ASYNC, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct omap_des_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = omap_des_cra_init, + .cra_exit = omap_des_cra_exit, + .cra_u.ablkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .setkey = omap_des_setkey, + .encrypt = omap_des_ecb_encrypt, + .decrypt = omap_des_ecb_decrypt, + } +}, +{ + .cra_name = "cbc(des)", + .cra_driver_name = "cbc-des-omap", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | + CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_ASYNC, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct omap_des_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = omap_des_cra_init, + .cra_exit = omap_des_cra_exit, + .cra_u.ablkcipher = { + .min_keysize = DES_KEY_SIZE, + .max_keysize = DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = omap_des_setkey, + .encrypt = omap_des_cbc_encrypt, + .decrypt = omap_des_cbc_decrypt, + } +}, +{ + .cra_name = "ecb(des3_ede)", + .cra_driver_name = "ecb-des3-omap", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | + CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_ASYNC, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct omap_des_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = omap_des_cra_init, + .cra_exit = omap_des_cra_exit, + .cra_u.ablkcipher = { + .min_keysize = 3*DES_KEY_SIZE, + .max_keysize = 3*DES_KEY_SIZE, + .setkey = omap_des_setkey, + .encrypt = omap_des_ecb_encrypt, + .decrypt = omap_des_ecb_decrypt, + } +}, +{ + .cra_name = "cbc(des3_ede)", + .cra_driver_name = "cbc-des3-omap", + .cra_priority = 100, + .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | + CRYPTO_ALG_KERN_DRIVER_ONLY | + CRYPTO_ALG_ASYNC, + .cra_blocksize = DES_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct omap_des_ctx), + .cra_alignmask = 0, + .cra_type = &crypto_ablkcipher_type, + .cra_module = THIS_MODULE, + .cra_init = omap_des_cra_init, + .cra_exit = omap_des_cra_exit, + .cra_u.ablkcipher = { + .min_keysize = 3*DES_KEY_SIZE, + .max_keysize = 3*DES_KEY_SIZE, + .ivsize = DES_BLOCK_SIZE, + .setkey = omap_des_setkey, + .encrypt = omap_des_cbc_encrypt, + .decrypt = omap_des_cbc_decrypt, + } +} +}; + +static struct omap_des_algs_info omap_des_algs_info_ecb_cbc[] = { + { + .algs_list = algs_ecb_cbc, + .size = ARRAY_SIZE(algs_ecb_cbc), + }, +}; + +#ifdef CONFIG_OF +static const struct omap_des_pdata omap_des_pdata_omap4 = { + .algs_info = omap_des_algs_info_ecb_cbc, + .algs_info_size = ARRAY_SIZE(omap_des_algs_info_ecb_cbc), + .trigger = omap_des_dma_trigger_omap4, + .key_ofs = 0x14, + .iv_ofs = 0x18, + .ctrl_ofs = 0x20, + .data_ofs = 0x28, + .rev_ofs = 0x30, + .mask_ofs = 0x34, + .irq_status_ofs = 0x3c, + .irq_enable_ofs = 0x40, + .dma_enable_in = BIT(5), + .dma_enable_out = BIT(6), + .major_mask = 0x0700, + .major_shift = 8, + .minor_mask = 0x003f, + .minor_shift = 0, +}; + +static irqreturn_t omap_des_irq(int irq, void *dev_id) +{ + struct omap_des_dev *dd = dev_id; + u32 status, i; + u32 *src, *dst; + + status = omap_des_read(dd, DES_REG_IRQ_STATUS(dd)); + if (status & DES_REG_IRQ_DATA_IN) { + omap_des_write(dd, DES_REG_IRQ_ENABLE(dd), 0x0); + + BUG_ON(!dd->in_sg); + + BUG_ON(_calc_walked(in) > dd->in_sg->length); + + src = sg_virt(dd->in_sg) + _calc_walked(in); + + for (i = 0; i < DES_BLOCK_WORDS; i++) { + omap_des_write(dd, DES_REG_DATA_N(dd, i), *src); + + scatterwalk_advance(&dd->in_walk, 4); + if (dd->in_sg->length == _calc_walked(in)) { + dd->in_sg = scatterwalk_sg_next(dd->in_sg); + if (dd->in_sg) { + scatterwalk_start(&dd->in_walk, + dd->in_sg); + src = sg_virt(dd->in_sg) + + _calc_walked(in); + } + } else { + src++; + } + } + + /* Clear IRQ status */ + status &= ~DES_REG_IRQ_DATA_IN; + omap_des_write(dd, DES_REG_IRQ_STATUS(dd), status); + + /* Enable DATA_OUT interrupt */ + omap_des_write(dd, DES_REG_IRQ_ENABLE(dd), 0x4); + + } else if (status & DES_REG_IRQ_DATA_OUT) { + omap_des_write(dd, DES_REG_IRQ_ENABLE(dd), 0x0); + + BUG_ON(!dd->out_sg); + + BUG_ON(_calc_walked(out) > dd->out_sg->length); + + dst = sg_virt(dd->out_sg) + _calc_walked(out); + + for (i = 0; i < DES_BLOCK_WORDS; i++) { + *dst = omap_des_read(dd, DES_REG_DATA_N(dd, i)); + scatterwalk_advance(&dd->out_walk, 4); + if (dd->out_sg->length == _calc_walked(out)) { + dd->out_sg = scatterwalk_sg_next(dd->out_sg); + if (dd->out_sg) { + scatterwalk_start(&dd->out_walk, + dd->out_sg); + dst = sg_virt(dd->out_sg) + + _calc_walked(out); + } + } else { + dst++; + } + } + + dd->total -= DES_BLOCK_SIZE; + + BUG_ON(dd->total < 0); + + /* Clear IRQ status */ + status &= ~DES_REG_IRQ_DATA_OUT; + omap_des_write(dd, DES_REG_IRQ_STATUS(dd), status); + + if (!dd->total) + /* All bytes read! */ + tasklet_schedule(&dd->done_task); + else + /* Enable DATA_IN interrupt for next block */ + omap_des_write(dd, DES_REG_IRQ_ENABLE(dd), 0x2); + } + + return IRQ_HANDLED; +} + +static const struct of_device_id omap_des_of_match[] = { + { + .compatible = "ti,omap4-des", + .data = &omap_des_pdata_omap4, + }, + {}, +}; +MODULE_DEVICE_TABLE(of, omap_des_of_match); + +static int omap_des_get_of(struct omap_des_dev *dd, + struct platform_device *pdev) +{ + const struct of_device_id *match; + + match = of_match_device(of_match_ptr(omap_des_of_match), &pdev->dev); + if (!match) { + dev_err(&pdev->dev, "no compatible OF match\n"); + return -EINVAL; + } + + dd->dma_out = -1; /* Dummy value that's unused */ + dd->dma_in = -1; /* Dummy value that's unused */ + dd->pdata = match->data; + + return 0; +} +#else +static int omap_des_get_of(struct omap_des_dev *dd, + struct device *dev) +{ + return -EINVAL; +} +#endif + +static int omap_des_get_pdev(struct omap_des_dev *dd, + struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct resource *r; + int err = 0; + + /* Get the DMA out channel */ + r = platform_get_resource(pdev, IORESOURCE_DMA, 0); + if (!r) { + dev_err(dev, "no DMA out resource info\n"); + err = -ENODEV; + goto err; + } + dd->dma_out = r->start; + + /* Get the DMA in channel */ + r = platform_get_resource(pdev, IORESOURCE_DMA, 1); + if (!r) { + dev_err(dev, "no DMA in resource info\n"); + err = -ENODEV; + goto err; + } + dd->dma_in = r->start; + + /* non-DT devices get pdata from pdev */ + dd->pdata = pdev->dev.platform_data; + +err: + return err; +} + +static int omap_des_probe(struct platform_device *pdev) +{ + struct device *dev = &pdev->dev; + struct omap_des_dev *dd; + struct crypto_alg *algp; + struct resource *res; + int err = -ENOMEM, i, j, irq = -1; + u32 reg; + + dd = devm_kzalloc(dev, sizeof(struct omap_des_dev), GFP_KERNEL); + if (dd == NULL) { + dev_err(dev, "unable to alloc data struct.\n"); + goto err_data; + } + dd->dev = dev; + platform_set_drvdata(pdev, dd); + + spin_lock_init(&dd->lock); + crypto_init_queue(&dd->queue, OMAP_DES_QUEUE_LENGTH); + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + if (!res) { + dev_err(dev, "no MEM resource info\n"); + goto err_res; + } + + err = (dev->of_node) ? omap_des_get_of(dd, pdev) : + omap_des_get_pdev(dd, pdev); + if (err) + goto err_res; + + dd->io_base = devm_request_and_ioremap(dev, res); + if (!dd->io_base) { + dev_err(dev, "can't ioremap\n"); + err = -ENOMEM; + goto err_res; + } + dd->phys_base = res->start; + + pm_runtime_enable(dev); + pm_runtime_get_sync(dev); + + omap_des_dma_stop(dd); + + reg = omap_des_read(dd, DES_REG_REV(dd)); + + pm_runtime_put_sync(dev); + + dev_info(dev, "OMAP DES hw accel rev: %u.%u\n", + (reg & dd->pdata->major_mask) >> dd->pdata->major_shift, + (reg & dd->pdata->minor_mask) >> dd->pdata->minor_shift); + + tasklet_init(&dd->done_task, omap_des_done_task, (unsigned long)dd); + tasklet_init(&dd->queue_task, omap_des_queue_task, (unsigned long)dd); + + err = omap_des_dma_init(dd); + if (err && DES_REG_IRQ_STATUS(dd) && DES_REG_IRQ_ENABLE(dd)) { + dd->pio_only = 1; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) { + dev_err(dev, "can't get IRQ resource\n"); + goto err_irq; + } + + err = devm_request_irq(dev, irq, omap_des_irq, 0, + dev_name(dev), dd); + if (err) { + dev_err(dev, "Unable to grab omap-des IRQ\n"); + goto err_irq; + } + } + + + INIT_LIST_HEAD(&dd->list); + spin_lock(&list_lock); + list_add_tail(&dd->list, &dev_list); + spin_unlock(&list_lock); + + for (i = 0; i < dd->pdata->algs_info_size; i++) { + for (j = 0; j < dd->pdata->algs_info[i].size; j++) { + algp = &dd->pdata->algs_info[i].algs_list[j]; + + pr_debug("reg alg: %s\n", algp->cra_name); + INIT_LIST_HEAD(&algp->cra_list); + + err = crypto_register_alg(algp); + if (err) + goto err_algs; + + dd->pdata->algs_info[i].registered++; + } + } + + return 0; +err_algs: + for (i = dd->pdata->algs_info_size - 1; i >= 0; i--) + for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) + crypto_unregister_alg( + &dd->pdata->algs_info[i].algs_list[j]); + if (!dd->pio_only) + omap_des_dma_cleanup(dd); +err_irq: + tasklet_kill(&dd->done_task); + tasklet_kill(&dd->queue_task); + pm_runtime_disable(dev); +err_res: + dd = NULL; +err_data: + dev_err(dev, "initialization failed.\n"); + return err; +} + +static int omap_des_remove(struct platform_device *pdev) +{ + struct omap_des_dev *dd = platform_get_drvdata(pdev); + int i, j; + + if (!dd) + return -ENODEV; + + spin_lock(&list_lock); + list_del(&dd->list); + spin_unlock(&list_lock); + + for (i = dd->pdata->algs_info_size - 1; i >= 0; i--) + for (j = dd->pdata->algs_info[i].registered - 1; j >= 0; j--) + crypto_unregister_alg( + &dd->pdata->algs_info[i].algs_list[j]); + + tasklet_kill(&dd->done_task); + tasklet_kill(&dd->queue_task); + omap_des_dma_cleanup(dd); + pm_runtime_disable(dd->dev); + dd = NULL; + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int omap_des_suspend(struct device *dev) +{ + pm_runtime_put_sync(dev); + return 0; +} + +static int omap_des_resume(struct device *dev) +{ + pm_runtime_get_sync(dev); + return 0; +} +#endif + +static const struct dev_pm_ops omap_des_pm_ops = { + SET_SYSTEM_SLEEP_PM_OPS(omap_des_suspend, omap_des_resume) +}; + +static struct platform_driver omap_des_driver = { + .probe = omap_des_probe, + .remove = omap_des_remove, + .driver = { + .name = "omap-des", + .owner = THIS_MODULE, + .pm = &omap_des_pm_ops, + .of_match_table = of_match_ptr(omap_des_of_match), + }, +}; + +module_platform_driver(omap_des_driver); + +MODULE_DESCRIPTION("OMAP DES hw acceleration support."); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Joel Fernandes "); From 701d0f1940925d40f6b8a40f6c278d14be71402b Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Fri, 14 Feb 2014 10:49:47 -0600 Subject: [PATCH 22/60] crypto: omap-des - Add config and build options Add config and build options for the omap-des driver. Signed-off-by: Joel Fernandes Signed-off-by: Herbert Xu --- drivers/crypto/Kconfig | 11 +++++++++++ drivers/crypto/Makefile | 1 + 2 files changed, 12 insertions(+) diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 13857f5d28f7..8e38000990d7 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -262,6 +262,17 @@ config CRYPTO_DEV_OMAP_AES OMAP processors have AES module accelerator. Select this if you want to use the OMAP module for AES algorithms. +config CRYPTO_DEV_OMAP_DES + tristate "Support for OMAP DES3DES hw engine" + depends on ARCH_OMAP2PLUS + select CRYPTO_DES + select CRYPTO_BLKCIPHER2 + help + OMAP processors have DES/3DES module accelerator. Select this if you + want to use the OMAP module for DES and 3DES algorithms. Currently + the ECB and CBC modes of operation supported by the driver. Also + accesses made on unaligned boundaries are also supported. + config CRYPTO_DEV_PICOXCELL tristate "Support for picoXcell IPSEC and Layer2 crypto engines" depends on ARCH_PICOXCELL && HAVE_CLK diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 0bc6aa0a54d7..9a53fb8ceff9 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -13,6 +13,7 @@ obj-$(CONFIG_CRYPTO_DEV_NIAGARA2) += n2_crypto.o n2_crypto-y := n2_core.o n2_asm.o obj-$(CONFIG_CRYPTO_DEV_NX) += nx/ obj-$(CONFIG_CRYPTO_DEV_OMAP_AES) += omap-aes.o +obj-$(CONFIG_CRYPTO_DEV_OMAP_DES) += omap-des.o obj-$(CONFIG_CRYPTO_DEV_OMAP_SHAM) += omap-sham.o obj-$(CONFIG_CRYPTO_DEV_PADLOCK_AES) += padlock-aes.o obj-$(CONFIG_CRYPTO_DEV_PADLOCK_SHA) += padlock-sha.o From 645af2e43705d8c80c9606d5c5e83eda9a1a1a2e Mon Sep 17 00:00:00 2001 From: Stephen Warren Date: Tue, 18 Feb 2014 14:42:57 -0700 Subject: [PATCH 23/60] crypto: tegra - remove driver This driver has never been hooked up in any board file, and cannot be instantiated via device tree. I've been told that, at least on Tegra20, the HW is slower at crypto than the main CPU. I have no test-case for it. Hence, remove it. Cc: Varun Wadekar Signed-off-by: Stephen Warren Signed-off-by: Herbert Xu --- drivers/crypto/Kconfig | 11 - drivers/crypto/Makefile | 1 - drivers/crypto/tegra-aes.c | 1087 ------------------------------------ drivers/crypto/tegra-aes.h | 103 ---- 4 files changed, 1202 deletions(-) delete mode 100644 drivers/crypto/tegra-aes.c delete mode 100644 drivers/crypto/tegra-aes.h diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig index 8e38000990d7..03ccdb0ccf9e 100644 --- a/drivers/crypto/Kconfig +++ b/drivers/crypto/Kconfig @@ -311,17 +311,6 @@ config CRYPTO_DEV_S5P Select this to offload Samsung S5PV210 or S5PC110 from AES algorithms execution. -config CRYPTO_DEV_TEGRA_AES - tristate "Support for TEGRA AES hw engine" - depends on ARCH_TEGRA - select CRYPTO_AES - help - TEGRA processors have AES module accelerator. Select this if you - want to use the TEGRA module for AES algorithms. - - To compile this driver as a module, choose M here: the module - will be called tegra-aes. - config CRYPTO_DEV_NX bool "Support for IBM Power7+ in-Nest cryptographic acceleration" depends on PPC64 && IBMVIO diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile index 9a53fb8ceff9..482f090d16d0 100644 --- a/drivers/crypto/Makefile +++ b/drivers/crypto/Makefile @@ -22,5 +22,4 @@ obj-$(CONFIG_CRYPTO_DEV_PPC4XX) += amcc/ obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o -obj-$(CONFIG_CRYPTO_DEV_TEGRA_AES) += tegra-aes.o obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/ diff --git a/drivers/crypto/tegra-aes.c b/drivers/crypto/tegra-aes.c deleted file mode 100644 index 060eecc5dbc3..000000000000 --- a/drivers/crypto/tegra-aes.c +++ /dev/null @@ -1,1087 +0,0 @@ -/* - * drivers/crypto/tegra-aes.c - * - * Driver for NVIDIA Tegra AES hardware engine residing inside the - * Bit Stream Engine for Video (BSEV) hardware block. - * - * The programming sequence for this engine is with the help - * of commands which travel via a command queue residing between the - * CPU and the BSEV block. The BSEV engine has an internal RAM (VRAM) - * where the final input plaintext, keys and the IV have to be copied - * before starting the encrypt/decrypt operation. - * - * Copyright (c) 2010, NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -#include "tegra-aes.h" - -#define FLAGS_MODE_MASK 0x00FF -#define FLAGS_ENCRYPT BIT(0) -#define FLAGS_CBC BIT(1) -#define FLAGS_GIV BIT(2) -#define FLAGS_RNG BIT(3) -#define FLAGS_OFB BIT(4) -#define FLAGS_NEW_KEY BIT(5) -#define FLAGS_NEW_IV BIT(6) -#define FLAGS_INIT BIT(7) -#define FLAGS_FAST BIT(8) -#define FLAGS_BUSY 9 - -/* - * Defines AES engine Max process bytes size in one go, which takes 1 msec. - * AES engine spends about 176 cycles/16-bytes or 11 cycles/byte - * The duration CPU can use the BSE to 1 msec, then the number of available - * cycles of AVP/BSE is 216K. In this duration, AES can process 216/11 ~= 19KB - * Based on this AES_HW_DMA_BUFFER_SIZE_BYTES is configured to 16KB. - */ -#define AES_HW_DMA_BUFFER_SIZE_BYTES 0x4000 - -/* - * The key table length is 64 bytes - * (This includes first upto 32 bytes key + 16 bytes original initial vector - * and 16 bytes updated initial vector) - */ -#define AES_HW_KEY_TABLE_LENGTH_BYTES 64 - -/* - * The memory being used is divides as follows: - * 1. Key - 32 bytes - * 2. Original IV - 16 bytes - * 3. Updated IV - 16 bytes - * 4. Key schedule - 256 bytes - * - * 1+2+3 constitute the hw key table. - */ -#define AES_HW_IV_SIZE 16 -#define AES_HW_KEYSCHEDULE_LEN 256 -#define AES_IVKEY_SIZE (AES_HW_KEY_TABLE_LENGTH_BYTES + AES_HW_KEYSCHEDULE_LEN) - -/* Define commands required for AES operation */ -enum { - CMD_BLKSTARTENGINE = 0x0E, - CMD_DMASETUP = 0x10, - CMD_DMACOMPLETE = 0x11, - CMD_SETTABLE = 0x15, - CMD_MEMDMAVD = 0x22, -}; - -/* Define sub-commands */ -enum { - SUBCMD_VRAM_SEL = 0x1, - SUBCMD_CRYPTO_TABLE_SEL = 0x3, - SUBCMD_KEY_TABLE_SEL = 0x8, -}; - -/* memdma_vd command */ -#define MEMDMA_DIR_DTOVRAM 0 /* sdram -> vram */ -#define MEMDMA_DIR_VTODRAM 1 /* vram -> sdram */ -#define MEMDMA_DIR_SHIFT 25 -#define MEMDMA_NUM_WORDS_SHIFT 12 - -/* command queue bit shifts */ -enum { - CMDQ_KEYTABLEADDR_SHIFT = 0, - CMDQ_KEYTABLEID_SHIFT = 17, - CMDQ_VRAMSEL_SHIFT = 23, - CMDQ_TABLESEL_SHIFT = 24, - CMDQ_OPCODE_SHIFT = 26, -}; - -/* - * The secure key slot contains a unique secure key generated - * and loaded by the bootloader. This slot is marked as non-accessible - * to the kernel. - */ -#define SSK_SLOT_NUM 4 - -#define AES_NR_KEYSLOTS 8 -#define TEGRA_AES_QUEUE_LENGTH 50 -#define DEFAULT_RNG_BLK_SZ 16 - -/* The command queue depth */ -#define AES_HW_MAX_ICQ_LENGTH 5 - -struct tegra_aes_slot { - struct list_head node; - int slot_num; -}; - -static struct tegra_aes_slot ssk = { - .slot_num = SSK_SLOT_NUM, -}; - -struct tegra_aes_reqctx { - unsigned long mode; -}; - -struct tegra_aes_dev { - struct device *dev; - void __iomem *io_base; - dma_addr_t ivkey_phys_base; - void __iomem *ivkey_base; - struct clk *aes_clk; - struct tegra_aes_ctx *ctx; - int irq; - unsigned long flags; - struct completion op_complete; - u32 *buf_in; - dma_addr_t dma_buf_in; - u32 *buf_out; - dma_addr_t dma_buf_out; - u8 *iv; - u8 dt[DEFAULT_RNG_BLK_SZ]; - int ivlen; - u64 ctr; - spinlock_t lock; - struct crypto_queue queue; - struct tegra_aes_slot *slots; - struct ablkcipher_request *req; - size_t total; - struct scatterlist *in_sg; - size_t in_offset; - struct scatterlist *out_sg; - size_t out_offset; -}; - -static struct tegra_aes_dev *aes_dev; - -struct tegra_aes_ctx { - struct tegra_aes_dev *dd; - unsigned long flags; - struct tegra_aes_slot *slot; - u8 key[AES_MAX_KEY_SIZE]; - size_t keylen; -}; - -static struct tegra_aes_ctx rng_ctx = { - .flags = FLAGS_NEW_KEY, - .keylen = AES_KEYSIZE_128, -}; - -/* keep registered devices data here */ -static struct list_head dev_list; -static DEFINE_SPINLOCK(list_lock); -static DEFINE_MUTEX(aes_lock); - -static void aes_workqueue_handler(struct work_struct *work); -static DECLARE_WORK(aes_work, aes_workqueue_handler); -static struct workqueue_struct *aes_wq; - -static inline u32 aes_readl(struct tegra_aes_dev *dd, u32 offset) -{ - return readl(dd->io_base + offset); -} - -static inline void aes_writel(struct tegra_aes_dev *dd, u32 val, u32 offset) -{ - writel(val, dd->io_base + offset); -} - -static int aes_start_crypt(struct tegra_aes_dev *dd, u32 in_addr, u32 out_addr, - int nblocks, int mode, bool upd_iv) -{ - u32 cmdq[AES_HW_MAX_ICQ_LENGTH]; - int i, eng_busy, icq_empty, ret; - u32 value; - - /* reset all the interrupt bits */ - aes_writel(dd, 0xFFFFFFFF, TEGRA_AES_INTR_STATUS); - - /* enable error, dma xfer complete interrupts */ - aes_writel(dd, 0x33, TEGRA_AES_INT_ENB); - - cmdq[0] = CMD_DMASETUP << CMDQ_OPCODE_SHIFT; - cmdq[1] = in_addr; - cmdq[2] = CMD_BLKSTARTENGINE << CMDQ_OPCODE_SHIFT | (nblocks-1); - cmdq[3] = CMD_DMACOMPLETE << CMDQ_OPCODE_SHIFT; - - value = aes_readl(dd, TEGRA_AES_CMDQUE_CONTROL); - /* access SDRAM through AHB */ - value &= ~TEGRA_AES_CMDQ_CTRL_SRC_STM_SEL_FIELD; - value &= ~TEGRA_AES_CMDQ_CTRL_DST_STM_SEL_FIELD; - value |= TEGRA_AES_CMDQ_CTRL_SRC_STM_SEL_FIELD | - TEGRA_AES_CMDQ_CTRL_DST_STM_SEL_FIELD | - TEGRA_AES_CMDQ_CTRL_ICMDQEN_FIELD; - aes_writel(dd, value, TEGRA_AES_CMDQUE_CONTROL); - dev_dbg(dd->dev, "cmd_q_ctrl=0x%x", value); - - value = (0x1 << TEGRA_AES_SECURE_INPUT_ALG_SEL_SHIFT) | - ((dd->ctx->keylen * 8) << - TEGRA_AES_SECURE_INPUT_KEY_LEN_SHIFT) | - ((u32)upd_iv << TEGRA_AES_SECURE_IV_SELECT_SHIFT); - - if (mode & FLAGS_CBC) { - value |= ((((mode & FLAGS_ENCRYPT) ? 2 : 3) - << TEGRA_AES_SECURE_XOR_POS_SHIFT) | - (((mode & FLAGS_ENCRYPT) ? 2 : 3) - << TEGRA_AES_SECURE_VCTRAM_SEL_SHIFT) | - ((mode & FLAGS_ENCRYPT) ? 1 : 0) - << TEGRA_AES_SECURE_CORE_SEL_SHIFT); - } else if (mode & FLAGS_OFB) { - value |= ((TEGRA_AES_SECURE_XOR_POS_FIELD) | - (2 << TEGRA_AES_SECURE_INPUT_SEL_SHIFT) | - (TEGRA_AES_SECURE_CORE_SEL_FIELD)); - } else if (mode & FLAGS_RNG) { - value |= (((mode & FLAGS_ENCRYPT) ? 1 : 0) - << TEGRA_AES_SECURE_CORE_SEL_SHIFT | - TEGRA_AES_SECURE_RNG_ENB_FIELD); - } else { - value |= (((mode & FLAGS_ENCRYPT) ? 1 : 0) - << TEGRA_AES_SECURE_CORE_SEL_SHIFT); - } - - dev_dbg(dd->dev, "secure_in_sel=0x%x", value); - aes_writel(dd, value, TEGRA_AES_SECURE_INPUT_SELECT); - - aes_writel(dd, out_addr, TEGRA_AES_SECURE_DEST_ADDR); - reinit_completion(&dd->op_complete); - - for (i = 0; i < AES_HW_MAX_ICQ_LENGTH - 1; i++) { - do { - value = aes_readl(dd, TEGRA_AES_INTR_STATUS); - eng_busy = value & TEGRA_AES_ENGINE_BUSY_FIELD; - icq_empty = value & TEGRA_AES_ICQ_EMPTY_FIELD; - } while (eng_busy && !icq_empty); - aes_writel(dd, cmdq[i], TEGRA_AES_ICMDQUE_WR); - } - - ret = wait_for_completion_timeout(&dd->op_complete, - msecs_to_jiffies(150)); - if (ret == 0) { - dev_err(dd->dev, "timed out (0x%x)\n", - aes_readl(dd, TEGRA_AES_INTR_STATUS)); - return -ETIMEDOUT; - } - - aes_writel(dd, cmdq[AES_HW_MAX_ICQ_LENGTH - 1], TEGRA_AES_ICMDQUE_WR); - return 0; -} - -static void aes_release_key_slot(struct tegra_aes_slot *slot) -{ - if (slot->slot_num == SSK_SLOT_NUM) - return; - - spin_lock(&list_lock); - list_add_tail(&slot->node, &dev_list); - slot = NULL; - spin_unlock(&list_lock); -} - -static struct tegra_aes_slot *aes_find_key_slot(void) -{ - struct tegra_aes_slot *slot = NULL; - struct list_head *new_head; - int empty; - - spin_lock(&list_lock); - empty = list_empty(&dev_list); - if (!empty) { - slot = list_entry(&dev_list, struct tegra_aes_slot, node); - new_head = dev_list.next; - list_del(&dev_list); - dev_list.next = new_head->next; - dev_list.prev = NULL; - } - spin_unlock(&list_lock); - - return slot; -} - -static int aes_set_key(struct tegra_aes_dev *dd) -{ - u32 value, cmdq[2]; - struct tegra_aes_ctx *ctx = dd->ctx; - int eng_busy, icq_empty, dma_busy; - bool use_ssk = false; - - /* use ssk? */ - if (!dd->ctx->slot) { - dev_dbg(dd->dev, "using ssk"); - dd->ctx->slot = &ssk; - use_ssk = true; - } - - /* enable key schedule generation in hardware */ - value = aes_readl(dd, TEGRA_AES_SECURE_CONFIG_EXT); - value &= ~TEGRA_AES_SECURE_KEY_SCH_DIS_FIELD; - aes_writel(dd, value, TEGRA_AES_SECURE_CONFIG_EXT); - - /* select the key slot */ - value = aes_readl(dd, TEGRA_AES_SECURE_CONFIG); - value &= ~TEGRA_AES_SECURE_KEY_INDEX_FIELD; - value |= (ctx->slot->slot_num << TEGRA_AES_SECURE_KEY_INDEX_SHIFT); - aes_writel(dd, value, TEGRA_AES_SECURE_CONFIG); - - if (use_ssk) - return 0; - - /* copy the key table from sdram to vram */ - cmdq[0] = CMD_MEMDMAVD << CMDQ_OPCODE_SHIFT | - MEMDMA_DIR_DTOVRAM << MEMDMA_DIR_SHIFT | - AES_HW_KEY_TABLE_LENGTH_BYTES / sizeof(u32) << - MEMDMA_NUM_WORDS_SHIFT; - cmdq[1] = (u32)dd->ivkey_phys_base; - - aes_writel(dd, cmdq[0], TEGRA_AES_ICMDQUE_WR); - aes_writel(dd, cmdq[1], TEGRA_AES_ICMDQUE_WR); - - do { - value = aes_readl(dd, TEGRA_AES_INTR_STATUS); - eng_busy = value & TEGRA_AES_ENGINE_BUSY_FIELD; - icq_empty = value & TEGRA_AES_ICQ_EMPTY_FIELD; - dma_busy = value & TEGRA_AES_DMA_BUSY_FIELD; - } while (eng_busy && !icq_empty && dma_busy); - - /* settable command to get key into internal registers */ - value = CMD_SETTABLE << CMDQ_OPCODE_SHIFT | - SUBCMD_CRYPTO_TABLE_SEL << CMDQ_TABLESEL_SHIFT | - SUBCMD_VRAM_SEL << CMDQ_VRAMSEL_SHIFT | - (SUBCMD_KEY_TABLE_SEL | ctx->slot->slot_num) << - CMDQ_KEYTABLEID_SHIFT; - aes_writel(dd, value, TEGRA_AES_ICMDQUE_WR); - - do { - value = aes_readl(dd, TEGRA_AES_INTR_STATUS); - eng_busy = value & TEGRA_AES_ENGINE_BUSY_FIELD; - icq_empty = value & TEGRA_AES_ICQ_EMPTY_FIELD; - } while (eng_busy && !icq_empty); - - return 0; -} - -static int tegra_aes_handle_req(struct tegra_aes_dev *dd) -{ - struct crypto_async_request *async_req, *backlog; - struct crypto_ablkcipher *tfm; - struct tegra_aes_ctx *ctx; - struct tegra_aes_reqctx *rctx; - struct ablkcipher_request *req; - unsigned long flags; - int dma_max = AES_HW_DMA_BUFFER_SIZE_BYTES; - int ret = 0, nblocks, total; - int count = 0; - dma_addr_t addr_in, addr_out; - struct scatterlist *in_sg, *out_sg; - - if (!dd) - return -EINVAL; - - spin_lock_irqsave(&dd->lock, flags); - backlog = crypto_get_backlog(&dd->queue); - async_req = crypto_dequeue_request(&dd->queue); - if (!async_req) - clear_bit(FLAGS_BUSY, &dd->flags); - spin_unlock_irqrestore(&dd->lock, flags); - - if (!async_req) - return -ENODATA; - - if (backlog) - backlog->complete(backlog, -EINPROGRESS); - - req = ablkcipher_request_cast(async_req); - - dev_dbg(dd->dev, "%s: get new req\n", __func__); - - if (!req->src || !req->dst) - return -EINVAL; - - /* take mutex to access the aes hw */ - mutex_lock(&aes_lock); - - /* assign new request to device */ - dd->req = req; - dd->total = req->nbytes; - dd->in_offset = 0; - dd->in_sg = req->src; - dd->out_offset = 0; - dd->out_sg = req->dst; - - in_sg = dd->in_sg; - out_sg = dd->out_sg; - - total = dd->total; - - tfm = crypto_ablkcipher_reqtfm(req); - rctx = ablkcipher_request_ctx(req); - ctx = crypto_ablkcipher_ctx(tfm); - rctx->mode &= FLAGS_MODE_MASK; - dd->flags = (dd->flags & ~FLAGS_MODE_MASK) | rctx->mode; - - dd->iv = (u8 *)req->info; - dd->ivlen = crypto_ablkcipher_ivsize(tfm); - - /* assign new context to device */ - ctx->dd = dd; - dd->ctx = ctx; - - if (ctx->flags & FLAGS_NEW_KEY) { - /* copy the key */ - memcpy(dd->ivkey_base, ctx->key, ctx->keylen); - memset(dd->ivkey_base + ctx->keylen, 0, AES_HW_KEY_TABLE_LENGTH_BYTES - ctx->keylen); - aes_set_key(dd); - ctx->flags &= ~FLAGS_NEW_KEY; - } - - if (((dd->flags & FLAGS_CBC) || (dd->flags & FLAGS_OFB)) && dd->iv) { - /* set iv to the aes hw slot - * Hw generates updated iv only after iv is set in slot. - * So key and iv is passed asynchronously. - */ - memcpy(dd->buf_in, dd->iv, dd->ivlen); - - ret = aes_start_crypt(dd, (u32)dd->dma_buf_in, - dd->dma_buf_out, 1, FLAGS_CBC, false); - if (ret < 0) { - dev_err(dd->dev, "aes_start_crypt fail(%d)\n", ret); - goto out; - } - } - - while (total) { - dev_dbg(dd->dev, "remain: %d\n", total); - ret = dma_map_sg(dd->dev, in_sg, 1, DMA_TO_DEVICE); - if (!ret) { - dev_err(dd->dev, "dma_map_sg() error\n"); - goto out; - } - - ret = dma_map_sg(dd->dev, out_sg, 1, DMA_FROM_DEVICE); - if (!ret) { - dev_err(dd->dev, "dma_map_sg() error\n"); - dma_unmap_sg(dd->dev, dd->in_sg, - 1, DMA_TO_DEVICE); - goto out; - } - - addr_in = sg_dma_address(in_sg); - addr_out = sg_dma_address(out_sg); - dd->flags |= FLAGS_FAST; - count = min_t(int, sg_dma_len(in_sg), dma_max); - WARN_ON(sg_dma_len(in_sg) != sg_dma_len(out_sg)); - nblocks = DIV_ROUND_UP(count, AES_BLOCK_SIZE); - - ret = aes_start_crypt(dd, addr_in, addr_out, nblocks, - dd->flags, true); - - dma_unmap_sg(dd->dev, out_sg, 1, DMA_FROM_DEVICE); - dma_unmap_sg(dd->dev, in_sg, 1, DMA_TO_DEVICE); - - if (ret < 0) { - dev_err(dd->dev, "aes_start_crypt fail(%d)\n", ret); - goto out; - } - dd->flags &= ~FLAGS_FAST; - - dev_dbg(dd->dev, "out: copied %d\n", count); - total -= count; - in_sg = sg_next(in_sg); - out_sg = sg_next(out_sg); - WARN_ON(((total != 0) && (!in_sg || !out_sg))); - } - -out: - mutex_unlock(&aes_lock); - - dd->total = total; - - if (dd->req->base.complete) - dd->req->base.complete(&dd->req->base, ret); - - dev_dbg(dd->dev, "%s: exit\n", __func__); - return ret; -} - -static int tegra_aes_setkey(struct crypto_ablkcipher *tfm, const u8 *key, - unsigned int keylen) -{ - struct tegra_aes_ctx *ctx = crypto_ablkcipher_ctx(tfm); - struct tegra_aes_dev *dd = aes_dev; - struct tegra_aes_slot *key_slot; - - if ((keylen != AES_KEYSIZE_128) && (keylen != AES_KEYSIZE_192) && - (keylen != AES_KEYSIZE_256)) { - dev_err(dd->dev, "unsupported key size\n"); - crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); - return -EINVAL; - } - - dev_dbg(dd->dev, "keylen: %d\n", keylen); - - ctx->dd = dd; - - if (key) { - if (!ctx->slot) { - key_slot = aes_find_key_slot(); - if (!key_slot) { - dev_err(dd->dev, "no empty slot\n"); - return -ENOMEM; - } - - ctx->slot = key_slot; - } - - memcpy(ctx->key, key, keylen); - ctx->keylen = keylen; - } - - ctx->flags |= FLAGS_NEW_KEY; - dev_dbg(dd->dev, "done\n"); - return 0; -} - -static void aes_workqueue_handler(struct work_struct *work) -{ - struct tegra_aes_dev *dd = aes_dev; - int ret; - - ret = clk_prepare_enable(dd->aes_clk); - if (ret) - BUG_ON("clock enable failed"); - - /* empty the crypto queue and then return */ - do { - ret = tegra_aes_handle_req(dd); - } while (!ret); - - clk_disable_unprepare(dd->aes_clk); -} - -static irqreturn_t aes_irq(int irq, void *dev_id) -{ - struct tegra_aes_dev *dd = (struct tegra_aes_dev *)dev_id; - u32 value = aes_readl(dd, TEGRA_AES_INTR_STATUS); - int busy = test_bit(FLAGS_BUSY, &dd->flags); - - if (!busy) { - dev_dbg(dd->dev, "spurious interrupt\n"); - return IRQ_NONE; - } - - dev_dbg(dd->dev, "irq_stat: 0x%x\n", value); - if (value & TEGRA_AES_INT_ERROR_MASK) - aes_writel(dd, TEGRA_AES_INT_ERROR_MASK, TEGRA_AES_INTR_STATUS); - - if (!(value & TEGRA_AES_ENGINE_BUSY_FIELD)) - complete(&dd->op_complete); - else - return IRQ_NONE; - - return IRQ_HANDLED; -} - -static int tegra_aes_crypt(struct ablkcipher_request *req, unsigned long mode) -{ - struct tegra_aes_reqctx *rctx = ablkcipher_request_ctx(req); - struct tegra_aes_dev *dd = aes_dev; - unsigned long flags; - int err = 0; - int busy; - - dev_dbg(dd->dev, "nbytes: %d, enc: %d, cbc: %d, ofb: %d\n", - req->nbytes, !!(mode & FLAGS_ENCRYPT), - !!(mode & FLAGS_CBC), !!(mode & FLAGS_OFB)); - - rctx->mode = mode; - - spin_lock_irqsave(&dd->lock, flags); - err = ablkcipher_enqueue_request(&dd->queue, req); - busy = test_and_set_bit(FLAGS_BUSY, &dd->flags); - spin_unlock_irqrestore(&dd->lock, flags); - - if (!busy) - queue_work(aes_wq, &aes_work); - - return err; -} - -static int tegra_aes_ecb_encrypt(struct ablkcipher_request *req) -{ - return tegra_aes_crypt(req, FLAGS_ENCRYPT); -} - -static int tegra_aes_ecb_decrypt(struct ablkcipher_request *req) -{ - return tegra_aes_crypt(req, 0); -} - -static int tegra_aes_cbc_encrypt(struct ablkcipher_request *req) -{ - return tegra_aes_crypt(req, FLAGS_ENCRYPT | FLAGS_CBC); -} - -static int tegra_aes_cbc_decrypt(struct ablkcipher_request *req) -{ - return tegra_aes_crypt(req, FLAGS_CBC); -} - -static int tegra_aes_ofb_encrypt(struct ablkcipher_request *req) -{ - return tegra_aes_crypt(req, FLAGS_ENCRYPT | FLAGS_OFB); -} - -static int tegra_aes_ofb_decrypt(struct ablkcipher_request *req) -{ - return tegra_aes_crypt(req, FLAGS_OFB); -} - -static int tegra_aes_get_random(struct crypto_rng *tfm, u8 *rdata, - unsigned int dlen) -{ - struct tegra_aes_dev *dd = aes_dev; - struct tegra_aes_ctx *ctx = &rng_ctx; - int ret, i; - u8 *dest = rdata, *dt = dd->dt; - - /* take mutex to access the aes hw */ - mutex_lock(&aes_lock); - - ret = clk_prepare_enable(dd->aes_clk); - if (ret) { - mutex_unlock(&aes_lock); - return ret; - } - - ctx->dd = dd; - dd->ctx = ctx; - dd->flags = FLAGS_ENCRYPT | FLAGS_RNG; - - memcpy(dd->buf_in, dt, DEFAULT_RNG_BLK_SZ); - - ret = aes_start_crypt(dd, (u32)dd->dma_buf_in, - (u32)dd->dma_buf_out, 1, dd->flags, true); - if (ret < 0) { - dev_err(dd->dev, "aes_start_crypt fail(%d)\n", ret); - dlen = ret; - goto out; - } - memcpy(dest, dd->buf_out, dlen); - - /* update the DT */ - for (i = DEFAULT_RNG_BLK_SZ - 1; i >= 0; i--) { - dt[i] += 1; - if (dt[i] != 0) - break; - } - -out: - clk_disable_unprepare(dd->aes_clk); - mutex_unlock(&aes_lock); - - dev_dbg(dd->dev, "%s: done\n", __func__); - return dlen; -} - -static int tegra_aes_rng_reset(struct crypto_rng *tfm, u8 *seed, - unsigned int slen) -{ - struct tegra_aes_dev *dd = aes_dev; - struct tegra_aes_ctx *ctx = &rng_ctx; - struct tegra_aes_slot *key_slot; - int ret = 0; - u8 tmp[16]; /* 16 bytes = 128 bits of entropy */ - u8 *dt; - - if (!ctx || !dd) { - pr_err("ctx=0x%x, dd=0x%x\n", - (unsigned int)ctx, (unsigned int)dd); - return -EINVAL; - } - - if (slen < (DEFAULT_RNG_BLK_SZ + AES_KEYSIZE_128)) { - dev_err(dd->dev, "seed size invalid"); - return -ENOMEM; - } - - /* take mutex to access the aes hw */ - mutex_lock(&aes_lock); - - if (!ctx->slot) { - key_slot = aes_find_key_slot(); - if (!key_slot) { - dev_err(dd->dev, "no empty slot\n"); - mutex_unlock(&aes_lock); - return -ENOMEM; - } - ctx->slot = key_slot; - } - - ctx->dd = dd; - dd->ctx = ctx; - dd->ctr = 0; - - ctx->keylen = AES_KEYSIZE_128; - ctx->flags |= FLAGS_NEW_KEY; - - /* copy the key to the key slot */ - memcpy(dd->ivkey_base, seed + DEFAULT_RNG_BLK_SZ, AES_KEYSIZE_128); - memset(dd->ivkey_base + AES_KEYSIZE_128, 0, AES_HW_KEY_TABLE_LENGTH_BYTES - AES_KEYSIZE_128); - - dd->iv = seed; - dd->ivlen = slen; - - dd->flags = FLAGS_ENCRYPT | FLAGS_RNG; - - ret = clk_prepare_enable(dd->aes_clk); - if (ret) { - mutex_unlock(&aes_lock); - return ret; - } - - aes_set_key(dd); - - /* set seed to the aes hw slot */ - memcpy(dd->buf_in, dd->iv, DEFAULT_RNG_BLK_SZ); - ret = aes_start_crypt(dd, (u32)dd->dma_buf_in, - dd->dma_buf_out, 1, FLAGS_CBC, false); - if (ret < 0) { - dev_err(dd->dev, "aes_start_crypt fail(%d)\n", ret); - goto out; - } - - if (dd->ivlen >= (2 * DEFAULT_RNG_BLK_SZ + AES_KEYSIZE_128)) { - dt = dd->iv + DEFAULT_RNG_BLK_SZ + AES_KEYSIZE_128; - } else { - get_random_bytes(tmp, sizeof(tmp)); - dt = tmp; - } - memcpy(dd->dt, dt, DEFAULT_RNG_BLK_SZ); - -out: - clk_disable_unprepare(dd->aes_clk); - mutex_unlock(&aes_lock); - - dev_dbg(dd->dev, "%s: done\n", __func__); - return ret; -} - -static int tegra_aes_cra_init(struct crypto_tfm *tfm) -{ - tfm->crt_ablkcipher.reqsize = sizeof(struct tegra_aes_reqctx); - - return 0; -} - -static void tegra_aes_cra_exit(struct crypto_tfm *tfm) -{ - struct tegra_aes_ctx *ctx = - crypto_ablkcipher_ctx((struct crypto_ablkcipher *)tfm); - - if (ctx && ctx->slot) - aes_release_key_slot(ctx->slot); -} - -static struct crypto_alg algs[] = { - { - .cra_name = "ecb(aes)", - .cra_driver_name = "ecb-aes-tegra", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_alignmask = 3, - .cra_type = &crypto_ablkcipher_type, - .cra_u.ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .setkey = tegra_aes_setkey, - .encrypt = tegra_aes_ecb_encrypt, - .decrypt = tegra_aes_ecb_decrypt, - }, - }, { - .cra_name = "cbc(aes)", - .cra_driver_name = "cbc-aes-tegra", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_alignmask = 3, - .cra_type = &crypto_ablkcipher_type, - .cra_u.ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_MIN_KEY_SIZE, - .setkey = tegra_aes_setkey, - .encrypt = tegra_aes_cbc_encrypt, - .decrypt = tegra_aes_cbc_decrypt, - } - }, { - .cra_name = "ofb(aes)", - .cra_driver_name = "ofb-aes-tegra", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC, - .cra_blocksize = AES_BLOCK_SIZE, - .cra_alignmask = 3, - .cra_type = &crypto_ablkcipher_type, - .cra_u.ablkcipher = { - .min_keysize = AES_MIN_KEY_SIZE, - .max_keysize = AES_MAX_KEY_SIZE, - .ivsize = AES_MIN_KEY_SIZE, - .setkey = tegra_aes_setkey, - .encrypt = tegra_aes_ofb_encrypt, - .decrypt = tegra_aes_ofb_decrypt, - } - }, { - .cra_name = "ansi_cprng", - .cra_driver_name = "rng-aes-tegra", - .cra_flags = CRYPTO_ALG_TYPE_RNG, - .cra_ctxsize = sizeof(struct tegra_aes_ctx), - .cra_type = &crypto_rng_type, - .cra_u.rng = { - .rng_make_random = tegra_aes_get_random, - .rng_reset = tegra_aes_rng_reset, - .seedsize = AES_KEYSIZE_128 + (2 * DEFAULT_RNG_BLK_SZ), - } - } -}; - -static int tegra_aes_probe(struct platform_device *pdev) -{ - struct device *dev = &pdev->dev; - struct tegra_aes_dev *dd; - struct resource *res; - int err = -ENOMEM, i = 0, j; - - dd = devm_kzalloc(dev, sizeof(struct tegra_aes_dev), GFP_KERNEL); - if (dd == NULL) { - dev_err(dev, "unable to alloc data struct.\n"); - return err; - } - - dd->dev = dev; - platform_set_drvdata(pdev, dd); - - dd->slots = devm_kzalloc(dev, sizeof(struct tegra_aes_slot) * - AES_NR_KEYSLOTS, GFP_KERNEL); - if (dd->slots == NULL) { - dev_err(dev, "unable to alloc slot struct.\n"); - goto out; - } - - spin_lock_init(&dd->lock); - crypto_init_queue(&dd->queue, TEGRA_AES_QUEUE_LENGTH); - - /* Get the module base address */ - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - dev_err(dev, "invalid resource type: base\n"); - err = -ENODEV; - goto out; - } - - if (!devm_request_mem_region(&pdev->dev, res->start, - resource_size(res), - dev_name(&pdev->dev))) { - dev_err(&pdev->dev, "Couldn't request MEM resource\n"); - return -ENODEV; - } - - dd->io_base = devm_ioremap(dev, res->start, resource_size(res)); - if (!dd->io_base) { - dev_err(dev, "can't ioremap register space\n"); - err = -ENOMEM; - goto out; - } - - /* Initialize the vde clock */ - dd->aes_clk = devm_clk_get(dev, "vde"); - if (IS_ERR(dd->aes_clk)) { - dev_err(dev, "iclock intialization failed.\n"); - err = -ENODEV; - goto out; - } - - err = clk_set_rate(dd->aes_clk, ULONG_MAX); - if (err) { - dev_err(dd->dev, "iclk set_rate fail(%d)\n", err); - goto out; - } - - /* - * the foll contiguous memory is allocated as follows - - * - hardware key table - * - key schedule - */ - dd->ivkey_base = dma_alloc_coherent(dev, AES_HW_KEY_TABLE_LENGTH_BYTES, - &dd->ivkey_phys_base, - GFP_KERNEL); - if (!dd->ivkey_base) { - dev_err(dev, "can not allocate iv/key buffer\n"); - err = -ENOMEM; - goto out; - } - - dd->buf_in = dma_alloc_coherent(dev, AES_HW_DMA_BUFFER_SIZE_BYTES, - &dd->dma_buf_in, GFP_KERNEL); - if (!dd->buf_in) { - dev_err(dev, "can not allocate dma-in buffer\n"); - err = -ENOMEM; - goto out; - } - - dd->buf_out = dma_alloc_coherent(dev, AES_HW_DMA_BUFFER_SIZE_BYTES, - &dd->dma_buf_out, GFP_KERNEL); - if (!dd->buf_out) { - dev_err(dev, "can not allocate dma-out buffer\n"); - err = -ENOMEM; - goto out; - } - - init_completion(&dd->op_complete); - aes_wq = alloc_workqueue("tegra_aes_wq", WQ_HIGHPRI | WQ_UNBOUND, 1); - if (!aes_wq) { - dev_err(dev, "alloc_workqueue failed\n"); - err = -ENOMEM; - goto out; - } - - /* get the irq */ - res = platform_get_resource(pdev, IORESOURCE_IRQ, 0); - if (!res) { - dev_err(dev, "invalid resource type: base\n"); - err = -ENODEV; - goto out; - } - dd->irq = res->start; - - err = devm_request_irq(dev, dd->irq, aes_irq, IRQF_TRIGGER_HIGH | - IRQF_SHARED, "tegra-aes", dd); - if (err) { - dev_err(dev, "request_irq failed\n"); - goto out; - } - - mutex_init(&aes_lock); - INIT_LIST_HEAD(&dev_list); - - spin_lock_init(&list_lock); - spin_lock(&list_lock); - for (i = 0; i < AES_NR_KEYSLOTS; i++) { - if (i == SSK_SLOT_NUM) - continue; - dd->slots[i].slot_num = i; - INIT_LIST_HEAD(&dd->slots[i].node); - list_add_tail(&dd->slots[i].node, &dev_list); - } - spin_unlock(&list_lock); - - aes_dev = dd; - for (i = 0; i < ARRAY_SIZE(algs); i++) { - algs[i].cra_priority = 300; - algs[i].cra_ctxsize = sizeof(struct tegra_aes_ctx); - algs[i].cra_module = THIS_MODULE; - algs[i].cra_init = tegra_aes_cra_init; - algs[i].cra_exit = tegra_aes_cra_exit; - - err = crypto_register_alg(&algs[i]); - if (err) - goto out; - } - - dev_info(dev, "registered"); - return 0; - -out: - for (j = 0; j < i; j++) - crypto_unregister_alg(&algs[j]); - if (dd->ivkey_base) - dma_free_coherent(dev, AES_HW_KEY_TABLE_LENGTH_BYTES, - dd->ivkey_base, dd->ivkey_phys_base); - if (dd->buf_in) - dma_free_coherent(dev, AES_HW_DMA_BUFFER_SIZE_BYTES, - dd->buf_in, dd->dma_buf_in); - if (dd->buf_out) - dma_free_coherent(dev, AES_HW_DMA_BUFFER_SIZE_BYTES, - dd->buf_out, dd->dma_buf_out); - if (aes_wq) - destroy_workqueue(aes_wq); - spin_lock(&list_lock); - list_del(&dev_list); - spin_unlock(&list_lock); - - aes_dev = NULL; - - dev_err(dev, "%s: initialization failed.\n", __func__); - return err; -} - -static int tegra_aes_remove(struct platform_device *pdev) -{ - struct device *dev = &pdev->dev; - struct tegra_aes_dev *dd = platform_get_drvdata(pdev); - int i; - - for (i = 0; i < ARRAY_SIZE(algs); i++) - crypto_unregister_alg(&algs[i]); - - cancel_work_sync(&aes_work); - destroy_workqueue(aes_wq); - spin_lock(&list_lock); - list_del(&dev_list); - spin_unlock(&list_lock); - - dma_free_coherent(dev, AES_HW_KEY_TABLE_LENGTH_BYTES, - dd->ivkey_base, dd->ivkey_phys_base); - dma_free_coherent(dev, AES_HW_DMA_BUFFER_SIZE_BYTES, - dd->buf_in, dd->dma_buf_in); - dma_free_coherent(dev, AES_HW_DMA_BUFFER_SIZE_BYTES, - dd->buf_out, dd->dma_buf_out); - aes_dev = NULL; - - return 0; -} - -static struct of_device_id tegra_aes_of_match[] = { - { .compatible = "nvidia,tegra20-aes", }, - { .compatible = "nvidia,tegra30-aes", }, - { }, -}; - -static struct platform_driver tegra_aes_driver = { - .probe = tegra_aes_probe, - .remove = tegra_aes_remove, - .driver = { - .name = "tegra-aes", - .owner = THIS_MODULE, - .of_match_table = tegra_aes_of_match, - }, -}; - -module_platform_driver(tegra_aes_driver); - -MODULE_DESCRIPTION("Tegra AES/OFB/CPRNG hw acceleration support."); -MODULE_AUTHOR("NVIDIA Corporation"); -MODULE_LICENSE("GPL v2"); diff --git a/drivers/crypto/tegra-aes.h b/drivers/crypto/tegra-aes.h deleted file mode 100644 index 6006333a8934..000000000000 --- a/drivers/crypto/tegra-aes.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2010, NVIDIA Corporation. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - * You should have received a copy of the GNU General Public License along - * with this program; if not, write to the Free Software Foundation, Inc., - * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#ifndef __CRYPTODEV_TEGRA_AES_H -#define __CRYPTODEV_TEGRA_AES_H - -#define TEGRA_AES_ICMDQUE_WR 0x1000 -#define TEGRA_AES_CMDQUE_CONTROL 0x1008 -#define TEGRA_AES_INTR_STATUS 0x1018 -#define TEGRA_AES_INT_ENB 0x1040 -#define TEGRA_AES_CONFIG 0x1044 -#define TEGRA_AES_IRAM_ACCESS_CFG 0x10A0 -#define TEGRA_AES_SECURE_DEST_ADDR 0x1100 -#define TEGRA_AES_SECURE_INPUT_SELECT 0x1104 -#define TEGRA_AES_SECURE_CONFIG 0x1108 -#define TEGRA_AES_SECURE_CONFIG_EXT 0x110C -#define TEGRA_AES_SECURE_SECURITY 0x1110 -#define TEGRA_AES_SECURE_HASH_RESULT0 0x1120 -#define TEGRA_AES_SECURE_HASH_RESULT1 0x1124 -#define TEGRA_AES_SECURE_HASH_RESULT2 0x1128 -#define TEGRA_AES_SECURE_HASH_RESULT3 0x112C -#define TEGRA_AES_SECURE_SEC_SEL0 0x1140 -#define TEGRA_AES_SECURE_SEC_SEL1 0x1144 -#define TEGRA_AES_SECURE_SEC_SEL2 0x1148 -#define TEGRA_AES_SECURE_SEC_SEL3 0x114C -#define TEGRA_AES_SECURE_SEC_SEL4 0x1150 -#define TEGRA_AES_SECURE_SEC_SEL5 0x1154 -#define TEGRA_AES_SECURE_SEC_SEL6 0x1158 -#define TEGRA_AES_SECURE_SEC_SEL7 0x115C - -/* interrupt status reg masks and shifts */ -#define TEGRA_AES_ENGINE_BUSY_FIELD BIT(0) -#define TEGRA_AES_ICQ_EMPTY_FIELD BIT(3) -#define TEGRA_AES_DMA_BUSY_FIELD BIT(23) - -/* secure select reg masks and shifts */ -#define TEGRA_AES_SECURE_SEL0_KEYREAD_ENB0_FIELD BIT(0) - -/* secure config ext masks and shifts */ -#define TEGRA_AES_SECURE_KEY_SCH_DIS_FIELD BIT(15) - -/* secure config masks and shifts */ -#define TEGRA_AES_SECURE_KEY_INDEX_SHIFT 20 -#define TEGRA_AES_SECURE_KEY_INDEX_FIELD (0x1F << TEGRA_AES_SECURE_KEY_INDEX_SHIFT) -#define TEGRA_AES_SECURE_BLOCK_CNT_SHIFT 0 -#define TEGRA_AES_SECURE_BLOCK_CNT_FIELD (0xFFFFF << TEGRA_AES_SECURE_BLOCK_CNT_SHIFT) - -/* stream interface select masks and shifts */ -#define TEGRA_AES_CMDQ_CTRL_UCMDQEN_FIELD BIT(0) -#define TEGRA_AES_CMDQ_CTRL_ICMDQEN_FIELD BIT(1) -#define TEGRA_AES_CMDQ_CTRL_SRC_STM_SEL_FIELD BIT(4) -#define TEGRA_AES_CMDQ_CTRL_DST_STM_SEL_FIELD BIT(5) - -/* config register masks and shifts */ -#define TEGRA_AES_CONFIG_ENDIAN_ENB_FIELD BIT(10) -#define TEGRA_AES_CONFIG_MODE_SEL_SHIFT 0 -#define TEGRA_AES_CONFIG_MODE_SEL_FIELD (0x1F << TEGRA_AES_CONFIG_MODE_SEL_SHIFT) - -/* extended config */ -#define TEGRA_AES_SECURE_OFFSET_CNT_SHIFT 24 -#define TEGRA_AES_SECURE_OFFSET_CNT_FIELD (0xFF << TEGRA_AES_SECURE_OFFSET_CNT_SHIFT) -#define TEGRA_AES_SECURE_KEYSCHED_GEN_FIELD BIT(15) - -/* init vector select */ -#define TEGRA_AES_SECURE_IV_SELECT_SHIFT 10 -#define TEGRA_AES_SECURE_IV_SELECT_FIELD BIT(10) - -/* secure engine input */ -#define TEGRA_AES_SECURE_INPUT_ALG_SEL_SHIFT 28 -#define TEGRA_AES_SECURE_INPUT_ALG_SEL_FIELD (0xF << TEGRA_AES_SECURE_INPUT_ALG_SEL_SHIFT) -#define TEGRA_AES_SECURE_INPUT_KEY_LEN_SHIFT 16 -#define TEGRA_AES_SECURE_INPUT_KEY_LEN_FIELD (0xFFF << TEGRA_AES_SECURE_INPUT_KEY_LEN_SHIFT) -#define TEGRA_AES_SECURE_RNG_ENB_FIELD BIT(11) -#define TEGRA_AES_SECURE_CORE_SEL_SHIFT 9 -#define TEGRA_AES_SECURE_CORE_SEL_FIELD BIT(9) -#define TEGRA_AES_SECURE_VCTRAM_SEL_SHIFT 7 -#define TEGRA_AES_SECURE_VCTRAM_SEL_FIELD (0x3 << TEGRA_AES_SECURE_VCTRAM_SEL_SHIFT) -#define TEGRA_AES_SECURE_INPUT_SEL_SHIFT 5 -#define TEGRA_AES_SECURE_INPUT_SEL_FIELD (0x3 << TEGRA_AES_SECURE_INPUT_SEL_SHIFT) -#define TEGRA_AES_SECURE_XOR_POS_SHIFT 3 -#define TEGRA_AES_SECURE_XOR_POS_FIELD (0x3 << TEGRA_AES_SECURE_XOR_POS_SHIFT) -#define TEGRA_AES_SECURE_HASH_ENB_FIELD BIT(2) -#define TEGRA_AES_SECURE_ON_THE_FLY_FIELD BIT(0) - -/* interrupt error mask */ -#define TEGRA_AES_INT_ERROR_MASK 0xFFF000 - -#endif From f829e7a32c9434e31e565bc79f5804a7a984c10f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 21 Feb 2014 11:51:31 +0300 Subject: [PATCH 24/60] crypto: caam - writel() arguments are swapped My guess is that this little endian configuration is never found in real life, but if it were then the writel() arguments are in the wrong order so the driver would crash immediately. Signed-off-by: Dan Carpenter Acked-by: Kim Phillips Signed-off-by: Herbert Xu --- drivers/crypto/caam/regs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/caam/regs.h b/drivers/crypto/caam/regs.h index d50174f45b21..cbde8b95a6f8 100644 --- a/drivers/crypto/caam/regs.h +++ b/drivers/crypto/caam/regs.h @@ -74,10 +74,10 @@ #endif #else #ifdef __LITTLE_ENDIAN -#define wr_reg32(reg, data) __raw_writel(reg, data) +#define wr_reg32(reg, data) __raw_writel(data, reg) #define rd_reg32(reg) __raw_readl(reg) #ifdef CONFIG_64BIT -#define wr_reg64(reg, data) __raw_writeq(reg, data) +#define wr_reg64(reg, data) __raw_writeq(data, reg) #define rd_reg64(reg) __raw_readq(reg) #endif #endif From 0611451b4e78ecb77b3323657f2ab4aadd6d28b3 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 24 Feb 2014 08:42:02 -0600 Subject: [PATCH 25/60] crypto: ccp - Prevent a possible lost CCP command request If a CCP command has been queued for processing at the crypto layer then, when dequeueing it for processing, the "can backlog" flag must be set so that the request isn't lost if the CCP backlog queue limit is reached. Signed-off-by: Tom Lendacky Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-crypto-main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c index 010fded5d46b..9d30d6fbfd3d 100644 --- a/drivers/crypto/ccp/ccp-crypto-main.c +++ b/drivers/crypto/ccp/ccp-crypto-main.c @@ -174,6 +174,10 @@ static void ccp_crypto_complete(void *data, int err) /* Submit the next cmd */ while (held) { + /* Since we have already queued the cmd, we must indicate that + * we can backlog so as not to "lose" this request. + */ + held->cmd->flags |= CCP_CMD_MAY_BACKLOG; ret = ccp_enqueue_cmd(held->cmd); if (ccp_crypto_success(ret)) break; From 950b10bae656d7edf5e2047bf0c9205980f49f2c Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 24 Feb 2014 08:42:08 -0600 Subject: [PATCH 26/60] crypto: ccp - Invoke context callback when there is a backlog error Invoke the callback routine associated with the crypto context if an error is encountered sending the command to the CCP during backlog processing. This is needed to free any resources used by the command. Signed-off-by: Tom Lendacky Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-crypto-main.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c index 9d30d6fbfd3d..7d98635c2c5e 100644 --- a/drivers/crypto/ccp/ccp-crypto-main.c +++ b/drivers/crypto/ccp/ccp-crypto-main.c @@ -183,6 +183,9 @@ static void ccp_crypto_complete(void *data, int err) break; /* Error occurred, report it and get the next entry */ + ctx = crypto_tfm_ctx(held->req->tfm); + if (ctx->complete) + ret = ctx->complete(held->req, ret); held->req->complete(held->req, ret); next = ccp_crypto_cmd_complete(held, &backlog); From c65a52f8360dc29116395708bde18399e4699f87 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Mon, 24 Feb 2014 08:42:14 -0600 Subject: [PATCH 27/60] crypto: ccp - Account for CCP backlog processing When the crypto layer is able to queue up a command for processing by the CCP on the initial call to ccp_crypto_enqueue_request and the CCP returns -EBUSY, then if the backlog flag is not set the command needs to be freed and not added to the active command list. Signed-off-by: Tom Lendacky Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-crypto-main.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/crypto/ccp/ccp-crypto-main.c b/drivers/crypto/ccp/ccp-crypto-main.c index 7d98635c2c5e..20dc848481e7 100644 --- a/drivers/crypto/ccp/ccp-crypto-main.c +++ b/drivers/crypto/ccp/ccp-crypto-main.c @@ -205,6 +205,7 @@ static int ccp_crypto_enqueue_cmd(struct ccp_crypto_cmd *crypto_cmd) { struct ccp_crypto_cmd *active = NULL, *tmp; unsigned long flags; + bool free_cmd = true; int ret; spin_lock_irqsave(&req_queue_lock, flags); @@ -231,7 +232,10 @@ static int ccp_crypto_enqueue_cmd(struct ccp_crypto_cmd *crypto_cmd) if (!active) { ret = ccp_enqueue_cmd(crypto_cmd->cmd); if (!ccp_crypto_success(ret)) - goto e_lock; + goto e_lock; /* Error, don't queue it */ + if ((ret == -EBUSY) && + !(crypto_cmd->cmd->flags & CCP_CMD_MAY_BACKLOG)) + goto e_lock; /* Not backlogging, don't queue it */ } if (req_queue.cmd_count >= CCP_CRYPTO_MAX_QLEN) { @@ -244,9 +248,14 @@ static int ccp_crypto_enqueue_cmd(struct ccp_crypto_cmd *crypto_cmd) req_queue.cmd_count++; list_add_tail(&crypto_cmd->entry, &req_queue.cmds); + free_cmd = false; + e_lock: spin_unlock_irqrestore(&req_queue_lock, flags); + if (free_cmd) + kfree(crypto_cmd); + return ret; } @@ -262,7 +271,6 @@ int ccp_crypto_enqueue_request(struct crypto_async_request *req, { struct ccp_crypto_cmd *crypto_cmd; gfp_t gfp; - int ret; gfp = req->flags & CRYPTO_TFM_REQ_MAY_SLEEP ? GFP_KERNEL : GFP_ATOMIC; @@ -287,11 +295,7 @@ int ccp_crypto_enqueue_request(struct crypto_async_request *req, else cmd->flags &= ~CCP_CMD_MAY_BACKLOG; - ret = ccp_crypto_enqueue_cmd(crypto_cmd); - if (!ccp_crypto_success(ret)) - kfree(crypto_cmd); - - return ret; + return ccp_crypto_enqueue_cmd(crypto_cmd); } struct scatterlist *ccp_crypto_sg_table_add(struct sg_table *table, From 3d6f1d12f5ed7603caeeb1870174882256bd0889 Mon Sep 17 00:00:00 2001 From: Alexander Shiyan Date: Mon, 10 Mar 2014 20:13:32 +0800 Subject: [PATCH 28/60] crypto: sahara - Use return value of devm_request_irq() on error Signed-off-by: Alexander Shiyan Signed-off-by: Herbert Xu --- drivers/crypto/sahara.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/sahara.c b/drivers/crypto/sahara.c index 894468fdb02d..07a5987ce67d 100644 --- a/drivers/crypto/sahara.c +++ b/drivers/crypto/sahara.c @@ -896,10 +896,11 @@ static int sahara_probe(struct platform_device *pdev) return irq; } - if (devm_request_irq(&pdev->dev, irq, sahara_irq_handler, - 0, SAHARA_NAME, dev) < 0) { + err = devm_request_irq(&pdev->dev, irq, sahara_irq_handler, + 0, dev_name(&pdev->dev), dev); + if (err) { dev_err(&pdev->dev, "failed to request irq\n"); - return -ENOENT; + return err; } /* clocks */ From 4ea5d9998a9de1c85167582d3fd2760cacf40f7d Mon Sep 17 00:00:00 2001 From: Sonic Zhang Date: Wed, 26 Feb 2014 10:39:16 +0800 Subject: [PATCH 29/60] crypt: bfin_crc - Remove useless SSYNC instruction and cache flush to DMA coherent memory 1) SSYNC instruction is blackfin specific and takes no effect in this driver. 2) DMA descriptor and SG middle buffer are in DMA coherent memory. No need to flush. 3) Turn kzalloc, ioremap and request_irq into managed device APIs respectively. Signed-off-by: Sonic Zhang Signed-off-by: Herbert Xu --- drivers/crypto/bfin_crc.c | 45 ++++++++++----------------------------- 1 file changed, 11 insertions(+), 34 deletions(-) diff --git a/drivers/crypto/bfin_crc.c b/drivers/crypto/bfin_crc.c index d797f31f5d85..c9ff298e6d26 100644 --- a/drivers/crypto/bfin_crc.c +++ b/drivers/crypto/bfin_crc.c @@ -139,7 +139,6 @@ static int bfin_crypto_crc_init_hw(struct bfin_crypto_crc *crc, u32 key) /* setup CRC interrupts */ crc->regs->status = CMPERRI | DCNTEXPI; crc->regs->intrenset = CMPERRI | DCNTEXPI; - SSYNC(); return 0; } @@ -285,17 +284,12 @@ static void bfin_crypto_crc_config_dma(struct bfin_crypto_crc *crc) if (i == 0) return; - flush_dcache_range((unsigned int)crc->sg_cpu, - (unsigned int)crc->sg_cpu + - i * sizeof(struct dma_desc_array)); - /* Set the last descriptor to stop mode */ crc->sg_cpu[i - 1].cfg &= ~(DMAFLOW | NDSIZE); crc->sg_cpu[i - 1].cfg |= DI_EN; set_dma_curr_desc_addr(crc->dma_ch, (unsigned long *)crc->sg_dma); set_dma_x_count(crc->dma_ch, 0); set_dma_x_modify(crc->dma_ch, 0); - SSYNC(); set_dma_config(crc->dma_ch, dma_config); } @@ -415,7 +409,6 @@ finish_update: /* finally kick off CRC operation */ crc->regs->control |= BLKEN; - SSYNC(); return -EINPROGRESS; } @@ -539,7 +532,6 @@ static irqreturn_t bfin_crypto_crc_handler(int irq, void *dev_id) if (crc->regs->status & DCNTEXP) { crc->regs->status = DCNTEXP; - SSYNC(); /* prepare results */ put_unaligned_le32(crc->regs->result, crc->req->result); @@ -594,7 +586,7 @@ static int bfin_crypto_crc_probe(struct platform_device *pdev) unsigned int timeout = 100000; int ret; - crc = kzalloc(sizeof(*crc), GFP_KERNEL); + crc = devm_kzalloc(dev, sizeof(*crc), GFP_KERNEL); if (!crc) { dev_err(&pdev->dev, "fail to malloc bfin_crypto_crc\n"); return -ENOMEM; @@ -610,42 +602,39 @@ static int bfin_crypto_crc_probe(struct platform_device *pdev) res = platform_get_resource(pdev, IORESOURCE_MEM, 0); if (res == NULL) { dev_err(&pdev->dev, "Cannot get IORESOURCE_MEM\n"); - ret = -ENOENT; - goto out_error_free_mem; + return -ENOENT; } - crc->regs = ioremap(res->start, resource_size(res)); - if (!crc->regs) { + crc->regs = devm_ioremap_resource(dev, res); + if (IS_ERR((void *)crc->regs)) { dev_err(&pdev->dev, "Cannot map CRC IO\n"); - ret = -ENXIO; - goto out_error_free_mem; + return PTR_ERR((void *)crc->regs); } crc->irq = platform_get_irq(pdev, 0); if (crc->irq < 0) { dev_err(&pdev->dev, "No CRC DCNTEXP IRQ specified\n"); - ret = -ENOENT; - goto out_error_unmap; + return -ENOENT; } - ret = request_irq(crc->irq, bfin_crypto_crc_handler, IRQF_SHARED, dev_name(dev), crc); + ret = devm_request_irq(dev, crc->irq, bfin_crypto_crc_handler, + IRQF_SHARED, dev_name(dev), crc); if (ret) { dev_err(&pdev->dev, "Unable to request blackfin crc irq\n"); - goto out_error_unmap; + return ret; } res = platform_get_resource(pdev, IORESOURCE_DMA, 0); if (res == NULL) { dev_err(&pdev->dev, "No CRC DMA channel specified\n"); - ret = -ENOENT; - goto out_error_irq; + return -ENOENT; } crc->dma_ch = res->start; ret = request_dma(crc->dma_ch, dev_name(dev)); if (ret) { dev_err(&pdev->dev, "Unable to attach Blackfin CRC DMA channel\n"); - goto out_error_irq; + return ret; } crc->sg_cpu = dma_alloc_coherent(&pdev->dev, PAGE_SIZE, &crc->sg_dma, GFP_KERNEL); @@ -660,9 +649,7 @@ static int bfin_crypto_crc_probe(struct platform_device *pdev) crc->sg_mid_buf = (u8 *)(crc->sg_cpu + ((CRC_MAX_DMA_DESC + 1) << 1)); crc->regs->control = 0; - SSYNC(); crc->regs->poly = crc->poly = (u32)pdev->dev.platform_data; - SSYNC(); while (!(crc->regs->status & LUTDONE) && (--timeout) > 0) cpu_relax(); @@ -693,12 +680,6 @@ out_error_dma: if (crc->sg_cpu) dma_free_coherent(&pdev->dev, PAGE_SIZE, crc->sg_cpu, crc->sg_dma); free_dma(crc->dma_ch); -out_error_irq: - free_irq(crc->irq, crc); -out_error_unmap: - iounmap((void *)crc->regs); -out_error_free_mem: - kfree(crc); return ret; } @@ -721,10 +702,6 @@ static int bfin_crypto_crc_remove(struct platform_device *pdev) crypto_unregister_ahash(&algs); tasklet_kill(&crc->done_task); free_dma(crc->dma_ch); - if (crc->irq > 0) - free_irq(crc->irq, crc); - iounmap((void *)crc->regs); - kfree(crc); return 0; } From 0c0becd0269240cd832b7aac16fb9b48d067520e Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Thu, 27 Feb 2014 14:00:09 +0900 Subject: [PATCH 30/60] hwrng: atmel - Use devm_clk_get() Use devm_clk_get() to make cleanup paths simpler. Signed-off-by: Jingoo Han Acked-by: Peter Korsgaard Acked-by: Nicolas Ferre Signed-off-by: Herbert Xu --- drivers/char/hw_random/atmel-rng.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/char/hw_random/atmel-rng.c b/drivers/char/hw_random/atmel-rng.c index dfeddf2c00b7..851bc7e20ad2 100644 --- a/drivers/char/hw_random/atmel-rng.c +++ b/drivers/char/hw_random/atmel-rng.c @@ -63,13 +63,13 @@ static int atmel_trng_probe(struct platform_device *pdev) if (IS_ERR(trng->base)) return PTR_ERR(trng->base); - trng->clk = clk_get(&pdev->dev, NULL); + trng->clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(trng->clk)) return PTR_ERR(trng->clk); ret = clk_enable(trng->clk); if (ret) - goto err_enable; + return ret; writel(TRNG_KEY | 1, trng->base + TRNG_CR); trng->rng.name = pdev->name; @@ -85,9 +85,6 @@ static int atmel_trng_probe(struct platform_device *pdev) err_register: clk_disable(trng->clk); -err_enable: - clk_put(trng->clk); - return ret; } @@ -99,7 +96,6 @@ static int atmel_trng_remove(struct platform_device *pdev) writel(TRNG_KEY, trng->base + TRNG_CR); clk_disable(trng->clk); - clk_put(trng->clk); return 0; } From 0574bce9691f238f3351b28c27de337d4519d595 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Thu, 27 Feb 2014 14:02:24 +0900 Subject: [PATCH 31/60] hwrng: omap3-rom - Use devm_clk_get() Use devm_clk_get() to make cleanup paths simpler. Signed-off-by: Jingoo Han Signed-off-by: Herbert Xu --- drivers/char/hw_random/omap3-rom-rng.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/char/hw_random/omap3-rom-rng.c b/drivers/char/hw_random/omap3-rom-rng.c index c853e9e68573..6f2eaffed623 100644 --- a/drivers/char/hw_random/omap3-rom-rng.c +++ b/drivers/char/hw_random/omap3-rom-rng.c @@ -103,7 +103,7 @@ static int omap3_rom_rng_probe(struct platform_device *pdev) } setup_timer(&idle_timer, omap3_rom_rng_idle, 0); - rng_clk = clk_get(&pdev->dev, "ick"); + rng_clk = devm_clk_get(&pdev->dev, "ick"); if (IS_ERR(rng_clk)) { pr_err("unable to get RNG clock\n"); return PTR_ERR(rng_clk); @@ -120,7 +120,6 @@ static int omap3_rom_rng_remove(struct platform_device *pdev) { hwrng_unregister(&omap3_rom_rng_ops); clk_disable_unprepare(rng_clk); - clk_put(rng_clk); return 0; } From 0c0aa8446433849f19761ebb7cd2f58fde387a04 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Thu, 27 Feb 2014 14:04:28 +0900 Subject: [PATCH 32/60] hwrng: pixocell - Use devm_clk_get() Use devm_clk_get() to make cleanup paths simpler. Signed-off-by: Jingoo Han Signed-off-by: Herbert Xu --- drivers/char/hw_random/picoxcell-rng.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/char/hw_random/picoxcell-rng.c b/drivers/char/hw_random/picoxcell-rng.c index c03beeeb1aea..eab5448ad56f 100644 --- a/drivers/char/hw_random/picoxcell-rng.c +++ b/drivers/char/hw_random/picoxcell-rng.c @@ -108,7 +108,7 @@ static int picoxcell_trng_probe(struct platform_device *pdev) if (IS_ERR(rng_base)) return PTR_ERR(rng_base); - rng_clk = clk_get(&pdev->dev, NULL); + rng_clk = devm_clk_get(&pdev->dev, NULL); if (IS_ERR(rng_clk)) { dev_warn(&pdev->dev, "no clk\n"); return PTR_ERR(rng_clk); @@ -117,7 +117,7 @@ static int picoxcell_trng_probe(struct platform_device *pdev) ret = clk_enable(rng_clk); if (ret) { dev_warn(&pdev->dev, "unable to enable clk\n"); - goto err_enable; + return ret; } picoxcell_trng_start(); @@ -132,9 +132,6 @@ static int picoxcell_trng_probe(struct platform_device *pdev) err_register: clk_disable(rng_clk); -err_enable: - clk_put(rng_clk); - return ret; } @@ -142,7 +139,6 @@ static int picoxcell_trng_remove(struct platform_device *pdev) { hwrng_unregister(&picoxcell_trng); clk_disable(rng_clk); - clk_put(rng_clk); return 0; } From 1655c24095da0b13bbd7a80b25948b19f6efdb37 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Thu, 27 Feb 2014 14:06:31 +0900 Subject: [PATCH 33/60] hwrng: nomadik - Use devm_*() functions Use devm_*() functions to make cleanup paths simpler. Signed-off-by: Jingoo Han Reviewed-by: Linus Walleij Signed-off-by: Herbert Xu --- drivers/char/hw_random/nomadik-rng.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/drivers/char/hw_random/nomadik-rng.c b/drivers/char/hw_random/nomadik-rng.c index 232b87fb5fc9..bc904bb3f8a2 100644 --- a/drivers/char/hw_random/nomadik-rng.c +++ b/drivers/char/hw_random/nomadik-rng.c @@ -44,7 +44,7 @@ static int nmk_rng_probe(struct amba_device *dev, const struct amba_id *id) void __iomem *base; int ret; - rng_clk = clk_get(&dev->dev, NULL); + rng_clk = devm_clk_get(&dev->dev, NULL); if (IS_ERR(rng_clk)) { dev_err(&dev->dev, "could not get rng clock\n"); ret = PTR_ERR(rng_clk); @@ -57,33 +57,28 @@ static int nmk_rng_probe(struct amba_device *dev, const struct amba_id *id) if (ret) goto out_clk; ret = -ENOMEM; - base = ioremap(dev->res.start, resource_size(&dev->res)); + base = devm_ioremap(&dev->dev, dev->res.start, + resource_size(&dev->res)); if (!base) goto out_release; nmk_rng.priv = (unsigned long)base; ret = hwrng_register(&nmk_rng); if (ret) - goto out_unmap; + goto out_release; return 0; -out_unmap: - iounmap(base); out_release: amba_release_regions(dev); out_clk: clk_disable(rng_clk); - clk_put(rng_clk); return ret; } static int nmk_rng_remove(struct amba_device *dev) { - void __iomem *base = (void __iomem *)nmk_rng.priv; hwrng_unregister(&nmk_rng); - iounmap(base); amba_release_regions(dev); clk_disable(rng_clk); - clk_put(rng_clk); return 0; } From 93b7f9c92840102da43aafc41dd943926826269e Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Thu, 27 Feb 2014 14:07:52 +0900 Subject: [PATCH 34/60] hwrng: timeriomem - Use devm_*() functions Use devm_*() functions to make cleanup paths simpler. Signed-off-by: Jingoo Han Signed-off-by: Herbert Xu --- drivers/char/hw_random/timeriomem-rng.c | 40 +++++++------------------ 1 file changed, 10 insertions(+), 30 deletions(-) diff --git a/drivers/char/hw_random/timeriomem-rng.c b/drivers/char/hw_random/timeriomem-rng.c index 73ce739f8e19..439ff8b28c43 100644 --- a/drivers/char/hw_random/timeriomem-rng.c +++ b/drivers/char/hw_random/timeriomem-rng.c @@ -118,7 +118,8 @@ static int timeriomem_rng_probe(struct platform_device *pdev) } /* Allocate memory for the device structure (and zero it) */ - priv = kzalloc(sizeof(struct timeriomem_rng_private_data), GFP_KERNEL); + priv = devm_kzalloc(&pdev->dev, + sizeof(struct timeriomem_rng_private_data), GFP_KERNEL); if (!priv) { dev_err(&pdev->dev, "failed to allocate device structure.\n"); return -ENOMEM; @@ -134,17 +135,16 @@ static int timeriomem_rng_probe(struct platform_device *pdev) period = i; else { dev_err(&pdev->dev, "missing period\n"); - err = -EINVAL; - goto out_free; + return -EINVAL; } - } else + } else { period = pdata->period; + } priv->period = usecs_to_jiffies(period); if (priv->period < 1) { dev_err(&pdev->dev, "period is less than one jiffy\n"); - err = -EINVAL; - goto out_free; + return -EINVAL; } priv->expires = jiffies; @@ -160,24 +160,16 @@ static int timeriomem_rng_probe(struct platform_device *pdev) priv->timeriomem_rng_ops.data_read = timeriomem_rng_data_read; priv->timeriomem_rng_ops.priv = (unsigned long)priv; - if (!request_mem_region(res->start, resource_size(res), - dev_name(&pdev->dev))) { - dev_err(&pdev->dev, "request_mem_region failed\n"); - err = -EBUSY; + priv->io_base = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(priv->io_base)) { + err = PTR_ERR(priv->io_base); goto out_timer; } - priv->io_base = ioremap(res->start, resource_size(res)); - if (priv->io_base == NULL) { - dev_err(&pdev->dev, "ioremap failed\n"); - err = -EIO; - goto out_release_io; - } - err = hwrng_register(&priv->timeriomem_rng_ops); if (err) { dev_err(&pdev->dev, "problem registering\n"); - goto out; + goto out_timer; } dev_info(&pdev->dev, "32bits from 0x%p @ %dus\n", @@ -185,30 +177,18 @@ static int timeriomem_rng_probe(struct platform_device *pdev) return 0; -out: - iounmap(priv->io_base); -out_release_io: - release_mem_region(res->start, resource_size(res)); out_timer: del_timer_sync(&priv->timer); -out_free: - kfree(priv); return err; } static int timeriomem_rng_remove(struct platform_device *pdev) { struct timeriomem_rng_private_data *priv = platform_get_drvdata(pdev); - struct resource *res; - - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); hwrng_unregister(&priv->timeriomem_rng_ops); del_timer_sync(&priv->timer); - iounmap(priv->io_base); - release_mem_region(res->start, resource_size(res)); - kfree(priv); return 0; } From ea7b284398984d9934e12470267a72fd663ac145 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Thu, 27 Feb 2014 20:31:38 +0900 Subject: [PATCH 35/60] crypto: omap-aes - Use SIMPLE_DEV_PM_OPS macro Use SIMPLE_DEV_PM_OPS macro in order to make the code simpler. Signed-off-by: Jingoo Han Acked-by: Nishanth Menon Signed-off-by: Herbert Xu --- drivers/crypto/omap-aes.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/crypto/omap-aes.c b/drivers/crypto/omap-aes.c index dde41f1df608..cb98fa54573d 100644 --- a/drivers/crypto/omap-aes.c +++ b/drivers/crypto/omap-aes.c @@ -1307,9 +1307,7 @@ static int omap_aes_resume(struct device *dev) } #endif -static const struct dev_pm_ops omap_aes_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(omap_aes_suspend, omap_aes_resume) -}; +static SIMPLE_DEV_PM_OPS(omap_aes_pm_ops, omap_aes_suspend, omap_aes_resume); static struct platform_driver omap_aes_driver = { .probe = omap_aes_probe, From e78f91932d6005adc41e4244294fca66cdbf4d90 Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Thu, 27 Feb 2014 20:32:35 +0900 Subject: [PATCH 36/60] crypto: omap-des - Use SIMPLE_DEV_PM_OPS macro Use SIMPLE_DEV_PM_OPS macro in order to make the code simpler. Signed-off-by: Jingoo Han Signed-off-by: Herbert Xu --- drivers/crypto/omap-des.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c index 006d9144485f..758919e23fdb 100644 --- a/drivers/crypto/omap-des.c +++ b/drivers/crypto/omap-des.c @@ -1196,9 +1196,7 @@ static int omap_des_resume(struct device *dev) } #endif -static const struct dev_pm_ops omap_des_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(omap_des_suspend, omap_des_resume) -}; +static SIMPLE_DEV_PM_OPS(omap_des_pm_ops, omap_des_suspend, omap_des_resume); static struct platform_driver omap_des_driver = { .probe = omap_des_probe, From ae12fe288559417ac69fd0d5ab976dc4612a768b Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Thu, 27 Feb 2014 20:33:32 +0900 Subject: [PATCH 37/60] crypto: omap-sham - Use SIMPLE_DEV_PM_OPS macro Use SIMPLE_DEV_PM_OPS macro in order to make the code simpler. Signed-off-by: Jingoo Han Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index a727a6a59653..4e2067df300d 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -2022,9 +2022,7 @@ static int omap_sham_resume(struct device *dev) } #endif -static const struct dev_pm_ops omap_sham_pm_ops = { - SET_SYSTEM_SLEEP_PM_OPS(omap_sham_suspend, omap_sham_resume) -}; +static SIMPLE_DEV_PM_OPS(omap_sham_pm_ops, omap_sham_suspend, omap_sham_resume); static struct platform_driver omap_sham_driver = { .probe = omap_sham_probe, From 26f25b2695aa0996aa01e86a212db94e8dd2006d Mon Sep 17 00:00:00 2001 From: Jingoo Han Date: Thu, 27 Feb 2014 20:34:36 +0900 Subject: [PATCH 38/60] crypto: omap-des - make local functions static Make omap_des_copy_needed(), omap_des_copy_sgs(), because these functions are used only in this file. Signed-off-by: Jingoo Han Acked-by: Joel Fernandes Signed-off-by: Herbert Xu --- drivers/crypto/omap-des.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c index 758919e23fdb..ec5f13162b73 100644 --- a/drivers/crypto/omap-des.c +++ b/drivers/crypto/omap-des.c @@ -535,7 +535,7 @@ static int omap_des_crypt_dma_stop(struct omap_des_dev *dd) return err; } -int omap_des_copy_needed(struct scatterlist *sg) +static int omap_des_copy_needed(struct scatterlist *sg) { while (sg) { if (!IS_ALIGNED(sg->offset, 4)) @@ -547,7 +547,7 @@ int omap_des_copy_needed(struct scatterlist *sg) return 0; } -int omap_des_copy_sgs(struct omap_des_dev *dd) +static int omap_des_copy_sgs(struct omap_des_dev *dd) { void *buf_in, *buf_out; int pages; From 1a7c685611713011179a0e92b06f43a378d3a8fd Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 3 Mar 2014 01:23:15 +0100 Subject: [PATCH 39/60] crypto: mxs-dcp - Align the bounce buffers The DCP needs the bounce buffers, DMA descriptors and result buffers aligned to 64 bytes (yet another hardware limitation). Make sure they are aligned by properly aligning the structure which contains them during allocation. Signed-off-by: Marek Vasut Cc: David S. Miller Cc: Fabio Estevam Cc: Herbert Xu Cc: Shawn Guo Cc: Tom Lendacky Signed-off-by: Herbert Xu --- drivers/crypto/mxs-dcp.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index 08761d61d4f5..c7400fe9522c 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -29,6 +29,8 @@ #define DCP_MAX_CHANS 4 #define DCP_BUF_SZ PAGE_SIZE +#define DCP_ALIGNMENT 64 + /* DCP DMA descriptor. */ struct dcp_dma_desc { uint32_t next_cmd_addr; @@ -947,12 +949,16 @@ static int mxs_dcp_probe(struct platform_device *pdev) } /* Allocate coherent helper block. */ - sdcp->coh = devm_kzalloc(dev, sizeof(*sdcp->coh), GFP_KERNEL); + sdcp->coh = devm_kzalloc(dev, sizeof(*sdcp->coh) + DCP_ALIGNMENT, + GFP_KERNEL); if (!sdcp->coh) { ret = -ENOMEM; goto err_mutex; } + /* Re-align the structure so it fits the DCP constraints. */ + sdcp->coh = PTR_ALIGN(sdcp->coh, DCP_ALIGNMENT); + /* Restart the DCP block. */ ret = stmp_reset_block(sdcp->base); if (ret) From 04d088cc0b19a4cb14680b92205fd4600470c46f Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Mon, 3 Mar 2014 13:40:30 +0100 Subject: [PATCH 40/60] crypto: mxs-dcp - Optimize hashing Optimize the hashing operation in the MXS-DCP by doing two adjustments: 1) Given that the output buffer for the hash is now always correctly aligned, we can just use the buffer for the DCP DMA to store the resulting hash. We thus get rid of one copying of data. Moreover, we remove an entry from dcp_coherent_block{} and thus lower the memory footprint of the driver. 2) We map the output buffer for the hash for DMA only in case we will output the hash, not always, as it was now. Signed-off-by: Marek Vasut Cc: David S. Miller Cc: Fabio Estevam Cc: Herbert Xu Cc: Shawn Guo Cc: Tom Lendacky Signed-off-by: Herbert Xu --- drivers/crypto/mxs-dcp.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/drivers/crypto/mxs-dcp.c b/drivers/crypto/mxs-dcp.c index c7400fe9522c..7bbe0ab21eca 100644 --- a/drivers/crypto/mxs-dcp.c +++ b/drivers/crypto/mxs-dcp.c @@ -50,7 +50,6 @@ struct dcp_coherent_block { uint8_t sha_in_buf[DCP_BUF_SZ]; uint8_t aes_key[2 * AES_KEYSIZE_128]; - uint8_t sha_digest[SHA256_DIGEST_SIZE]; struct dcp_dma_desc desc[DCP_MAX_CHANS]; }; @@ -516,13 +515,11 @@ static int mxs_dcp_run_sha(struct ahash_request *req) struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); struct dcp_async_ctx *actx = crypto_ahash_ctx(tfm); struct dcp_sha_req_ctx *rctx = ahash_request_ctx(req); + struct hash_alg_common *halg = crypto_hash_alg_common(tfm); struct dcp_dma_desc *desc = &sdcp->coh->desc[actx->chan]; - dma_addr_t digest_phys = dma_map_single(sdcp->dev, - sdcp->coh->sha_digest, - SHA256_DIGEST_SIZE, - DMA_FROM_DEVICE); + dma_addr_t digest_phys = 0; dma_addr_t buf_phys = dma_map_single(sdcp->dev, sdcp->coh->sha_in_buf, DCP_BUF_SZ, DMA_TO_DEVICE); @@ -543,14 +540,18 @@ static int mxs_dcp_run_sha(struct ahash_request *req) /* Set HASH_TERM bit for last transfer block. */ if (rctx->fini) { + digest_phys = dma_map_single(sdcp->dev, req->result, + halg->digestsize, DMA_FROM_DEVICE); desc->control0 |= MXS_DCP_CONTROL0_HASH_TERM; desc->payload = digest_phys; } ret = mxs_dcp_start_dma(actx); - dma_unmap_single(sdcp->dev, digest_phys, SHA256_DIGEST_SIZE, - DMA_FROM_DEVICE); + if (rctx->fini) + dma_unmap_single(sdcp->dev, digest_phys, halg->digestsize, + DMA_FROM_DEVICE); + dma_unmap_single(sdcp->dev, buf_phys, DCP_BUF_SZ, DMA_TO_DEVICE); return ret; @@ -567,7 +568,6 @@ static int dcp_sha_req_to_buf(struct crypto_async_request *arq) struct hash_alg_common *halg = crypto_hash_alg_common(tfm); const int nents = sg_nents(req->src); - uint8_t *digest = sdcp->coh->sha_digest; uint8_t *in_buf = sdcp->coh->sha_in_buf; uint8_t *src_buf; @@ -614,14 +614,20 @@ static int dcp_sha_req_to_buf(struct crypto_async_request *arq) rctx->fini = 1; /* Submit whatever is left. */ + if (!req->result) + return -EINVAL; + ret = mxs_dcp_run_sha(req); - if (ret || !req->result) + if (ret) return ret; + actx->fill = 0; /* For some reason, the result is flipped. */ - for (i = 0; i < halg->digestsize; i++) - req->result[i] = digest[halg->digestsize - i - 1]; + for (i = 0; i < halg->digestsize / 2; i++) { + swap(req->result[i], + req->result[halg->digestsize - i - 1]); + } } return 0; From d9e79726193346569af7953369a638ee2275ade5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 3 Mar 2014 15:51:48 -0800 Subject: [PATCH 41/60] hwrng: add randomness to system from rng sources When bringing a new RNG source online, it seems like it would make sense to use some of its bytes to make the system entropy pool more random, as done with all sorts of other devices that contain per-device or per-boot differences. Signed-off-by: Kees Cook Reviewed-by: Jason Cooper Signed-off-by: Herbert Xu --- drivers/char/hw_random/core.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index cf49f1c88f01..7ac96fbc06ed 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -41,6 +41,7 @@ #include #include #include +#include #include @@ -304,6 +305,8 @@ int hwrng_register(struct hwrng *rng) { int err = -EINVAL; struct hwrng *old_rng, *tmp; + unsigned char bytes[16]; + int bytes_read; if (rng->name == NULL || (rng->data_read == NULL && rng->read == NULL)) @@ -344,6 +347,10 @@ int hwrng_register(struct hwrng *rng) } INIT_LIST_HEAD(&rng->list); list_add_tail(&rng->list, &rng_list); + + bytes_read = rng_get_data(rng, bytes, sizeof(bytes), 1); + if (bytes_read > 0) + add_device_randomness(bytes, bytes_read); out_unlock: mutex_unlock(&rng_mutex); out: From 822be00fe67105a90e536df52d1e4d688f34b5b2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 4 Mar 2014 13:28:38 +0800 Subject: [PATCH 42/60] crypto: remove direct blkcipher_walk dependency on transform In order to allow other uses of the blkcipher walk API than the blkcipher algos themselves, this patch copies some of the transform data members to the walk struct so the transform is only accessed at walk init time. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/blkcipher.c | 67 ++++++++++++++++++++--------------------- include/crypto/algapi.h | 5 ++- 2 files changed, 37 insertions(+), 35 deletions(-) diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c index a79e7e9ab86e..46fdab5e9cc7 100644 --- a/crypto/blkcipher.c +++ b/crypto/blkcipher.c @@ -70,14 +70,12 @@ static inline u8 *blkcipher_get_spot(u8 *start, unsigned int len) return max(start, end_page); } -static inline unsigned int blkcipher_done_slow(struct crypto_blkcipher *tfm, - struct blkcipher_walk *walk, +static inline unsigned int blkcipher_done_slow(struct blkcipher_walk *walk, unsigned int bsize) { u8 *addr; - unsigned int alignmask = crypto_blkcipher_alignmask(tfm); - addr = (u8 *)ALIGN((unsigned long)walk->buffer, alignmask + 1); + addr = (u8 *)ALIGN((unsigned long)walk->buffer, walk->alignmask + 1); addr = blkcipher_get_spot(addr, bsize); scatterwalk_copychunks(addr, &walk->out, bsize, 1); return bsize; @@ -105,7 +103,6 @@ static inline unsigned int blkcipher_done_fast(struct blkcipher_walk *walk, int blkcipher_walk_done(struct blkcipher_desc *desc, struct blkcipher_walk *walk, int err) { - struct crypto_blkcipher *tfm = desc->tfm; unsigned int nbytes = 0; if (likely(err >= 0)) { @@ -117,7 +114,7 @@ int blkcipher_walk_done(struct blkcipher_desc *desc, err = -EINVAL; goto err; } else - n = blkcipher_done_slow(tfm, walk, n); + n = blkcipher_done_slow(walk, n); nbytes = walk->total - n; err = 0; @@ -136,7 +133,7 @@ err: } if (walk->iv != desc->info) - memcpy(desc->info, walk->iv, crypto_blkcipher_ivsize(tfm)); + memcpy(desc->info, walk->iv, walk->ivsize); if (walk->buffer != walk->page) kfree(walk->buffer); if (walk->page) @@ -226,22 +223,20 @@ static inline int blkcipher_next_fast(struct blkcipher_desc *desc, static int blkcipher_walk_next(struct blkcipher_desc *desc, struct blkcipher_walk *walk) { - struct crypto_blkcipher *tfm = desc->tfm; - unsigned int alignmask = crypto_blkcipher_alignmask(tfm); unsigned int bsize; unsigned int n; int err; n = walk->total; - if (unlikely(n < crypto_blkcipher_blocksize(tfm))) { + if (unlikely(n < walk->cipher_blocksize)) { desc->flags |= CRYPTO_TFM_RES_BAD_BLOCK_LEN; return blkcipher_walk_done(desc, walk, -EINVAL); } walk->flags &= ~(BLKCIPHER_WALK_SLOW | BLKCIPHER_WALK_COPY | BLKCIPHER_WALK_DIFF); - if (!scatterwalk_aligned(&walk->in, alignmask) || - !scatterwalk_aligned(&walk->out, alignmask)) { + if (!scatterwalk_aligned(&walk->in, walk->alignmask) || + !scatterwalk_aligned(&walk->out, walk->alignmask)) { walk->flags |= BLKCIPHER_WALK_COPY; if (!walk->page) { walk->page = (void *)__get_free_page(GFP_ATOMIC); @@ -250,12 +245,12 @@ static int blkcipher_walk_next(struct blkcipher_desc *desc, } } - bsize = min(walk->blocksize, n); + bsize = min(walk->walk_blocksize, n); n = scatterwalk_clamp(&walk->in, n); n = scatterwalk_clamp(&walk->out, n); if (unlikely(n < bsize)) { - err = blkcipher_next_slow(desc, walk, bsize, alignmask); + err = blkcipher_next_slow(desc, walk, bsize, walk->alignmask); goto set_phys_lowmem; } @@ -277,28 +272,26 @@ set_phys_lowmem: return err; } -static inline int blkcipher_copy_iv(struct blkcipher_walk *walk, - struct crypto_blkcipher *tfm, - unsigned int alignmask) +static inline int blkcipher_copy_iv(struct blkcipher_walk *walk) { - unsigned bs = walk->blocksize; - unsigned int ivsize = crypto_blkcipher_ivsize(tfm); - unsigned aligned_bs = ALIGN(bs, alignmask + 1); - unsigned int size = aligned_bs * 2 + ivsize + max(aligned_bs, ivsize) - - (alignmask + 1); + unsigned bs = walk->walk_blocksize; + unsigned aligned_bs = ALIGN(bs, walk->alignmask + 1); + unsigned int size = aligned_bs * 2 + + walk->ivsize + max(aligned_bs, walk->ivsize) - + (walk->alignmask + 1); u8 *iv; - size += alignmask & ~(crypto_tfm_ctx_alignment() - 1); + size += walk->alignmask & ~(crypto_tfm_ctx_alignment() - 1); walk->buffer = kmalloc(size, GFP_ATOMIC); if (!walk->buffer) return -ENOMEM; - iv = (u8 *)ALIGN((unsigned long)walk->buffer, alignmask + 1); + iv = (u8 *)ALIGN((unsigned long)walk->buffer, walk->alignmask + 1); iv = blkcipher_get_spot(iv, bs) + aligned_bs; iv = blkcipher_get_spot(iv, bs) + aligned_bs; - iv = blkcipher_get_spot(iv, ivsize); + iv = blkcipher_get_spot(iv, walk->ivsize); - walk->iv = memcpy(iv, walk->iv, ivsize); + walk->iv = memcpy(iv, walk->iv, walk->ivsize); return 0; } @@ -306,7 +299,10 @@ int blkcipher_walk_virt(struct blkcipher_desc *desc, struct blkcipher_walk *walk) { walk->flags &= ~BLKCIPHER_WALK_PHYS; - walk->blocksize = crypto_blkcipher_blocksize(desc->tfm); + walk->walk_blocksize = crypto_blkcipher_blocksize(desc->tfm); + walk->cipher_blocksize = walk->walk_blocksize; + walk->ivsize = crypto_blkcipher_ivsize(desc->tfm); + walk->alignmask = crypto_blkcipher_alignmask(desc->tfm); return blkcipher_walk_first(desc, walk); } EXPORT_SYMBOL_GPL(blkcipher_walk_virt); @@ -315,7 +311,10 @@ int blkcipher_walk_phys(struct blkcipher_desc *desc, struct blkcipher_walk *walk) { walk->flags |= BLKCIPHER_WALK_PHYS; - walk->blocksize = crypto_blkcipher_blocksize(desc->tfm); + walk->walk_blocksize = crypto_blkcipher_blocksize(desc->tfm); + walk->cipher_blocksize = walk->walk_blocksize; + walk->ivsize = crypto_blkcipher_ivsize(desc->tfm); + walk->alignmask = crypto_blkcipher_alignmask(desc->tfm); return blkcipher_walk_first(desc, walk); } EXPORT_SYMBOL_GPL(blkcipher_walk_phys); @@ -323,9 +322,6 @@ EXPORT_SYMBOL_GPL(blkcipher_walk_phys); static int blkcipher_walk_first(struct blkcipher_desc *desc, struct blkcipher_walk *walk) { - struct crypto_blkcipher *tfm = desc->tfm; - unsigned int alignmask = crypto_blkcipher_alignmask(tfm); - if (WARN_ON_ONCE(in_irq())) return -EDEADLK; @@ -335,8 +331,8 @@ static int blkcipher_walk_first(struct blkcipher_desc *desc, walk->buffer = NULL; walk->iv = desc->info; - if (unlikely(((unsigned long)walk->iv & alignmask))) { - int err = blkcipher_copy_iv(walk, tfm, alignmask); + if (unlikely(((unsigned long)walk->iv & walk->alignmask))) { + int err = blkcipher_copy_iv(walk); if (err) return err; } @@ -353,7 +349,10 @@ int blkcipher_walk_virt_block(struct blkcipher_desc *desc, unsigned int blocksize) { walk->flags &= ~BLKCIPHER_WALK_PHYS; - walk->blocksize = blocksize; + walk->walk_blocksize = blocksize; + walk->cipher_blocksize = crypto_blkcipher_blocksize(desc->tfm); + walk->ivsize = crypto_blkcipher_ivsize(desc->tfm); + walk->alignmask = crypto_blkcipher_alignmask(desc->tfm); return blkcipher_walk_first(desc, walk); } EXPORT_SYMBOL_GPL(blkcipher_walk_virt_block); diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index e73c19e90e38..d9d14a0f0653 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -100,9 +100,12 @@ struct blkcipher_walk { void *page; u8 *buffer; u8 *iv; + unsigned int ivsize; int flags; - unsigned int blocksize; + unsigned int walk_blocksize; + unsigned int cipher_blocksize; + unsigned int alignmask; }; struct ablkcipher_walk { From 4f7f1d7cff8f2c170ce0319eb4c01a82c328d34f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 4 Mar 2014 13:28:39 +0800 Subject: [PATCH 43/60] crypto: allow blkcipher walks over AEAD data This adds the function blkcipher_aead_walk_virt_block, which allows the caller to use the blkcipher walk API to handle the input and output scatterlists. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- crypto/blkcipher.c | 14 ++++++++++++++ include/crypto/algapi.h | 4 ++++ 2 files changed, 18 insertions(+) diff --git a/crypto/blkcipher.c b/crypto/blkcipher.c index 46fdab5e9cc7..0122bec38564 100644 --- a/crypto/blkcipher.c +++ b/crypto/blkcipher.c @@ -357,6 +357,20 @@ int blkcipher_walk_virt_block(struct blkcipher_desc *desc, } EXPORT_SYMBOL_GPL(blkcipher_walk_virt_block); +int blkcipher_aead_walk_virt_block(struct blkcipher_desc *desc, + struct blkcipher_walk *walk, + struct crypto_aead *tfm, + unsigned int blocksize) +{ + walk->flags &= ~BLKCIPHER_WALK_PHYS; + walk->walk_blocksize = blocksize; + walk->cipher_blocksize = crypto_aead_blocksize(tfm); + walk->ivsize = crypto_aead_ivsize(tfm); + walk->alignmask = crypto_aead_alignmask(tfm); + return blkcipher_walk_first(desc, walk); +} +EXPORT_SYMBOL_GPL(blkcipher_aead_walk_virt_block); + static int setkey_unaligned(struct crypto_tfm *tfm, const u8 *key, unsigned int keylen) { diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h index d9d14a0f0653..016c2f110f63 100644 --- a/include/crypto/algapi.h +++ b/include/crypto/algapi.h @@ -195,6 +195,10 @@ int blkcipher_walk_phys(struct blkcipher_desc *desc, int blkcipher_walk_virt_block(struct blkcipher_desc *desc, struct blkcipher_walk *walk, unsigned int blocksize); +int blkcipher_aead_walk_virt_block(struct blkcipher_desc *desc, + struct blkcipher_walk *walk, + struct crypto_aead *tfm, + unsigned int blocksize); int ablkcipher_walk_done(struct ablkcipher_request *req, struct ablkcipher_walk *walk, int err); From 6e4e603a9a99c6e27a74c1a813a7c751d85a721d Mon Sep 17 00:00:00 2001 From: Nitesh Lal Date: Fri, 7 Mar 2014 16:06:08 +0530 Subject: [PATCH 44/60] crypto: caam - Dynamic memory allocation for caam_rng_ctx object This patch allocates memory from DMAable region to the caam_rng_ctx object, earlier it had been statically allocated which resulted in errorneous behaviour on inserting the caamrng module at the runtime. Signed-off-by: Nitesh Lal Acked-by: Ruchika Gupta Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamrng.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c index 28486b19fc36..403d8d599b5f 100644 --- a/drivers/crypto/caam/caamrng.c +++ b/drivers/crypto/caam/caamrng.c @@ -76,7 +76,7 @@ struct caam_rng_ctx { struct buf_data bufs[2]; }; -static struct caam_rng_ctx rng_ctx; +static struct caam_rng_ctx *rng_ctx; static inline void rng_unmap_buf(struct device *jrdev, struct buf_data *bd) { @@ -137,7 +137,7 @@ static inline int submit_job(struct caam_rng_ctx *ctx, int to_current) static int caam_read(struct hwrng *rng, void *data, size_t max, bool wait) { - struct caam_rng_ctx *ctx = &rng_ctx; + struct caam_rng_ctx *ctx = rng_ctx; struct buf_data *bd = &ctx->bufs[ctx->current_buf]; int next_buf_idx, copied_idx; int err; @@ -237,12 +237,12 @@ static void caam_cleanup(struct hwrng *rng) struct buf_data *bd; for (i = 0; i < 2; i++) { - bd = &rng_ctx.bufs[i]; + bd = &rng_ctx->bufs[i]; if (atomic_read(&bd->empty) == BUF_PENDING) wait_for_completion(&bd->filled); } - rng_unmap_ctx(&rng_ctx); + rng_unmap_ctx(rng_ctx); } static void caam_init_buf(struct caam_rng_ctx *ctx, int buf_id) @@ -273,8 +273,9 @@ static struct hwrng caam_rng = { static void __exit caam_rng_exit(void) { - caam_jr_free(rng_ctx.jrdev); + caam_jr_free(rng_ctx->jrdev); hwrng_unregister(&caam_rng); + kfree(rng_ctx); } static int __init caam_rng_init(void) @@ -286,7 +287,9 @@ static int __init caam_rng_init(void) pr_err("Job Ring Device allocation for transform failed\n"); return PTR_ERR(dev); } - + rng_ctx = kmalloc(sizeof(struct caam_rng_ctx), GFP_DMA); + if (!rng_ctx) + return -ENOMEM; caam_init_rng(&rng_ctx, dev); dev_info(dev, "registering rng-caam\n"); From 26a05489ee0eb2a69b729438e63b1038b472fa57 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Fri, 7 Mar 2014 10:28:46 -0600 Subject: [PATCH 45/60] crypto: omap-sham - Map SG pages if they are HIGHMEM before accessing HIGHMEM pages may not be mapped so we must kmap them before accessing. This resolves a random OOPs error that was showing up during OpenSSL SHA tests. Signed-off-by: Joel Fernandes Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index 4e2067df300d..710d86386965 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -636,11 +636,17 @@ static size_t omap_sham_append_buffer(struct omap_sham_reqctx *ctx, static size_t omap_sham_append_sg(struct omap_sham_reqctx *ctx) { size_t count; + const u8 *vaddr; while (ctx->sg) { + vaddr = kmap_atomic(sg_page(ctx->sg)); + count = omap_sham_append_buffer(ctx, - sg_virt(ctx->sg) + ctx->offset, + vaddr + ctx->offset, ctx->sg->length - ctx->offset); + + kunmap_atomic((void *)vaddr); + if (!count) break; ctx->offset += count; From 85e0da925b2ddbf39a6cefb4e02ad51d0a0912c0 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 21 Mar 2014 21:32:54 +0800 Subject: [PATCH 46/60] crypto: caam - Fix first parameter to caam_init_rng Found by the kbuild test robot, the first argument to caam_init_rng has a spurious ampersand. Reported-by: kbuild test robot Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamrng.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/crypto/caam/caamrng.c b/drivers/crypto/caam/caamrng.c index 403d8d599b5f..3529b54048c9 100644 --- a/drivers/crypto/caam/caamrng.c +++ b/drivers/crypto/caam/caamrng.c @@ -290,7 +290,7 @@ static int __init caam_rng_init(void) rng_ctx = kmalloc(sizeof(struct caam_rng_ctx), GFP_DMA); if (!rng_ctx) return -ENOMEM; - caam_init_rng(&rng_ctx, dev); + caam_init_rng(rng_ctx, dev); dev_info(dev, "registering rng-caam\n"); return hwrng_register(&caam_rng); From ab6bf4e5e5e4298e8649e635bee25542cccbfd97 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 14 Mar 2014 02:37:04 +0100 Subject: [PATCH 47/60] crypto: hash - Fix the pointer voodoo in unaligned ahash Add documentation for the pointer voodoo that is happening in crypto/ahash.c in ahash_op_unaligned(). This code is quite confusing, so add a beefy chunk of documentation. Moreover, make sure the mangled request is completely restored after finishing this unaligned operation. This means restoring all of .result, .base.data and .base.complete . Also, remove the crypto_completion_t complete = ... line present in the ahash_op_unaligned_done() function. This type actually declares a function pointer, which is very confusing. Finally, yet very important nonetheless, make sure the req->priv is free()'d only after the original request is restored in ahash_op_unaligned_done(). The req->priv data must not be free()'d before that in ahash_op_unaligned_finish(), since we would be accessing previously free()'d data in ahash_op_unaligned_done() and cause corruption. Signed-off-by: Marek Vasut Cc: David S. Miller Cc: Fabio Estevam Cc: Herbert Xu Cc: Shawn Guo Cc: Tom Lendacky Signed-off-by: Herbert Xu --- crypto/ahash.c | 56 +++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 49 insertions(+), 7 deletions(-) diff --git a/crypto/ahash.c b/crypto/ahash.c index a92dc382f781..4fdb4d3fddec 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -201,22 +201,34 @@ static void ahash_op_unaligned_finish(struct ahash_request *req, int err) memcpy(priv->result, req->result, crypto_ahash_digestsize(crypto_ahash_reqtfm(req))); + /* Restore the original crypto request. */ + req->result = priv->result; + req->base.complete = priv->complete; + req->base.data = priv->data; + req->priv = NULL; + + /* Free the req->priv.priv from the ADJUSTED request. */ kzfree(priv); } static void ahash_op_unaligned_done(struct crypto_async_request *req, int err) { struct ahash_request *areq = req->data; - struct ahash_request_priv *priv = areq->priv; - crypto_completion_t complete = priv->complete; - void *data = priv->data; + /* + * Restore the original request, see ahash_op_unaligned() for what + * goes where. + * + * The "struct ahash_request *req" here is in fact the "req.base" + * from the ADJUSTED request from ahash_op_unaligned(), thus as it + * is a pointer to self, it is also the ADJUSTED "req" . + */ + + /* First copy areq->result into areq->priv.result */ ahash_op_unaligned_finish(areq, err); - areq->base.complete = complete; - areq->base.data = data; - - complete(&areq->base, err); + /* Complete the ORIGINAL request. */ + areq->base.complete(&areq->base, err); } static int ahash_op_unaligned(struct ahash_request *req, @@ -234,9 +246,39 @@ static int ahash_op_unaligned(struct ahash_request *req, if (!priv) return -ENOMEM; + /* + * WARNING: Voodoo programming below! + * + * The code below is obscure and hard to understand, thus explanation + * is necessary. See include/crypto/hash.h and include/linux/crypto.h + * to understand the layout of structures used here! + * + * The code here will replace portions of the ORIGINAL request with + * pointers to new code and buffers so the hashing operation can store + * the result in aligned buffer. We will call the modified request + * an ADJUSTED request. + * + * The newly mangled request will look as such: + * + * req { + * .result = ADJUSTED[new aligned buffer] + * .base.complete = ADJUSTED[pointer to completion function] + * .base.data = ADJUSTED[*req (pointer to self)] + * .priv = ADJUSTED[new priv] { + * .result = ORIGINAL(result) + * .complete = ORIGINAL(base.complete) + * .data = ORIGINAL(base.data) + * } + */ + priv->result = req->result; priv->complete = req->base.complete; priv->data = req->base.data; + /* + * WARNING: We do not backup req->priv here! The req->priv + * is for internal use of the Crypto API and the + * user must _NOT_ _EVER_ depend on it's content! + */ req->result = PTR_ALIGN((u8 *)priv->ubuf, alignmask + 1); req->base.complete = ahash_op_unaligned_done; From 1ffc9fbd1e5071948b6d48f9a27d845738ee890f Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 14 Mar 2014 02:37:05 +0100 Subject: [PATCH 48/60] crypto: hash - Pull out the functions to save/restore request The functions to save original request within a newly adjusted request and it's counterpart to restore the original request can be re-used by more code in the crypto/ahash.c file. Pull these functions out from the code so they're available. Signed-off-by: Marek Vasut Cc: David S. Miller Cc: Fabio Estevam Cc: Herbert Xu Cc: Shawn Guo Cc: Tom Lendacky Signed-off-by: Herbert Xu --- crypto/ahash.c | 107 ++++++++++++++++++++++++++++--------------------- 1 file changed, 62 insertions(+), 45 deletions(-) diff --git a/crypto/ahash.c b/crypto/ahash.c index 4fdb4d3fddec..fcc6077b9af0 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -190,55 +190,12 @@ static inline unsigned int ahash_align_buffer_size(unsigned len, return len + (mask & ~(crypto_tfm_ctx_alignment() - 1)); } -static void ahash_op_unaligned_finish(struct ahash_request *req, int err) -{ - struct ahash_request_priv *priv = req->priv; - - if (err == -EINPROGRESS) - return; - - if (!err) - memcpy(priv->result, req->result, - crypto_ahash_digestsize(crypto_ahash_reqtfm(req))); - - /* Restore the original crypto request. */ - req->result = priv->result; - req->base.complete = priv->complete; - req->base.data = priv->data; - req->priv = NULL; - - /* Free the req->priv.priv from the ADJUSTED request. */ - kzfree(priv); -} - -static void ahash_op_unaligned_done(struct crypto_async_request *req, int err) -{ - struct ahash_request *areq = req->data; - - /* - * Restore the original request, see ahash_op_unaligned() for what - * goes where. - * - * The "struct ahash_request *req" here is in fact the "req.base" - * from the ADJUSTED request from ahash_op_unaligned(), thus as it - * is a pointer to self, it is also the ADJUSTED "req" . - */ - - /* First copy areq->result into areq->priv.result */ - ahash_op_unaligned_finish(areq, err); - - /* Complete the ORIGINAL request. */ - areq->base.complete(&areq->base, err); -} - -static int ahash_op_unaligned(struct ahash_request *req, - int (*op)(struct ahash_request *)) +static int ahash_save_req(struct ahash_request *req, crypto_completion_t cplt) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); unsigned long alignmask = crypto_ahash_alignmask(tfm); unsigned int ds = crypto_ahash_digestsize(tfm); struct ahash_request_priv *priv; - int err; priv = kmalloc(sizeof(*priv) + ahash_align_buffer_size(ds, alignmask), (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? @@ -281,10 +238,70 @@ static int ahash_op_unaligned(struct ahash_request *req, */ req->result = PTR_ALIGN((u8 *)priv->ubuf, alignmask + 1); - req->base.complete = ahash_op_unaligned_done; + req->base.complete = cplt; req->base.data = req; req->priv = priv; + return 0; +} + +static void ahash_restore_req(struct ahash_request *req) +{ + struct ahash_request_priv *priv = req->priv; + + /* Restore the original crypto request. */ + req->result = priv->result; + req->base.complete = priv->complete; + req->base.data = priv->data; + req->priv = NULL; + + /* Free the req->priv.priv from the ADJUSTED request. */ + kzfree(priv); +} + +static void ahash_op_unaligned_finish(struct ahash_request *req, int err) +{ + struct ahash_request_priv *priv = req->priv; + + if (err == -EINPROGRESS) + return; + + if (!err) + memcpy(priv->result, req->result, + crypto_ahash_digestsize(crypto_ahash_reqtfm(req))); + + ahash_restore_req(req); +} + +static void ahash_op_unaligned_done(struct crypto_async_request *req, int err) +{ + struct ahash_request *areq = req->data; + + /* + * Restore the original request, see ahash_op_unaligned() for what + * goes where. + * + * The "struct ahash_request *req" here is in fact the "req.base" + * from the ADJUSTED request from ahash_op_unaligned(), thus as it + * is a pointer to self, it is also the ADJUSTED "req" . + */ + + /* First copy req->result into req->priv.result */ + ahash_op_unaligned_finish(areq, err); + + /* Complete the ORIGINAL request. */ + areq->base.complete(&areq->base, err); +} + +static int ahash_op_unaligned(struct ahash_request *req, + int (*op)(struct ahash_request *)) +{ + int err; + + err = ahash_save_req(req, ahash_op_unaligned_done); + if (err) + return err; + err = op(req); ahash_op_unaligned_finish(req, err); From d4a7a0fbe959e12bdd071b79b50ed34853a6db8f Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Fri, 14 Mar 2014 02:37:06 +0100 Subject: [PATCH 49/60] crypto: hash - Simplify the ahash_finup implementation The ahash_def_finup() can make use of the request save/restore functions, thus make it so. This simplifies the code a little and unifies the code paths. Note that the same remark about free()ing the req->priv applies here, the req->priv can only be free()'d after the original request was restored. Finally, squash a bug in the invocation of completion in the ASYNC path. In both ahash_def_finup_done{1,2}, the function areq->base.complete(X, err); was called with X=areq->base.data . This is incorrect , as X=&areq->base is the correct value. By analysis of the data structures, we see the areq is of type 'struct ahash_request' , areq->base is of type 'struct crypto_async_request' and areq->base.completion is of type crypto_completion_t, which is defined in include/linux/crypto.h as: typedef void (*crypto_completion_t)(struct crypto_async_request *req, int err); This is one lead that the X should be &areq->base . Next up, we can inspect other code which calls the completion callback to give us kind-of statistical idea of how this callback is used. We can try: $ git grep base\.complete\( drivers/crypto/ Finally, by inspecting ahash_request_set_callback() implementation defined in include/crypto/hash.h , we observe that the .data entry of 'struct crypto_async_request' is intended for arbitrary data, not for completion argument. Signed-off-by: Marek Vasut Cc: David S. Miller Cc: Fabio Estevam Cc: Herbert Xu Cc: Shawn Guo Cc: Tom Lendacky Signed-off-by: Herbert Xu --- crypto/ahash.c | 36 +++++++++--------------------------- 1 file changed, 9 insertions(+), 27 deletions(-) diff --git a/crypto/ahash.c b/crypto/ahash.c index fcc6077b9af0..6e7223392e80 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -349,19 +349,16 @@ static void ahash_def_finup_finish2(struct ahash_request *req, int err) memcpy(priv->result, req->result, crypto_ahash_digestsize(crypto_ahash_reqtfm(req))); - kzfree(priv); + ahash_restore_req(req); } static void ahash_def_finup_done2(struct crypto_async_request *req, int err) { struct ahash_request *areq = req->data; - struct ahash_request_priv *priv = areq->priv; - crypto_completion_t complete = priv->complete; - void *data = priv->data; ahash_def_finup_finish2(areq, err); - complete(data, err); + areq->base.complete(&areq->base, err); } static int ahash_def_finup_finish1(struct ahash_request *req, int err) @@ -381,38 +378,23 @@ out: static void ahash_def_finup_done1(struct crypto_async_request *req, int err) { struct ahash_request *areq = req->data; - struct ahash_request_priv *priv = areq->priv; - crypto_completion_t complete = priv->complete; - void *data = priv->data; err = ahash_def_finup_finish1(areq, err); - complete(data, err); + areq->base.complete(&areq->base, err); } static int ahash_def_finup(struct ahash_request *req) { struct crypto_ahash *tfm = crypto_ahash_reqtfm(req); - unsigned long alignmask = crypto_ahash_alignmask(tfm); - unsigned int ds = crypto_ahash_digestsize(tfm); - struct ahash_request_priv *priv; + int err; - priv = kmalloc(sizeof(*priv) + ahash_align_buffer_size(ds, alignmask), - (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? - GFP_KERNEL : GFP_ATOMIC); - if (!priv) - return -ENOMEM; + err = ahash_save_req(req, ahash_def_finup_done1); + if (err) + return err; - priv->result = req->result; - priv->complete = req->base.complete; - priv->data = req->base.data; - - req->result = PTR_ALIGN((u8 *)priv->ubuf, alignmask + 1); - req->base.complete = ahash_def_finup_done1; - req->base.data = req; - req->priv = priv; - - return ahash_def_finup_finish1(req, tfm->update(req)); + err = tfm->update(req); + return ahash_def_finup_finish1(req, err); } static int ahash_no_export(struct ahash_request *req, void *out) From 4464a7d4f53d756101291da26563f37f7fce40f3 Mon Sep 17 00:00:00 2001 From: Horia Geanta Date: Fri, 14 Mar 2014 17:46:49 +0200 Subject: [PATCH 50/60] crypto: caam - remove error propagation handling Commit 61bb86bba169507a5f223b94b9176c32c84b4721 ("crypto: caam - set descriptor sharing type to SERIAL") changed the descriptor sharing mode from SHARE_WAIT to SHARE_SERIAL. All descriptor commands that handle the "ok to share" and "error propagation" settings should also go away, since they have no meaning for SHARE_SERIAL. Signed-off-by: Horia Geanta Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamalg.c | 54 +++++------------------------------ 1 file changed, 7 insertions(+), 47 deletions(-) diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index b71f2fd749df..5016e63b6c25 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -66,8 +66,8 @@ /* length of descriptors text */ #define DESC_AEAD_BASE (4 * CAAM_CMD_SZ) -#define DESC_AEAD_ENC_LEN (DESC_AEAD_BASE + 16 * CAAM_CMD_SZ) -#define DESC_AEAD_DEC_LEN (DESC_AEAD_BASE + 21 * CAAM_CMD_SZ) +#define DESC_AEAD_ENC_LEN (DESC_AEAD_BASE + 15 * CAAM_CMD_SZ) +#define DESC_AEAD_DEC_LEN (DESC_AEAD_BASE + 18 * CAAM_CMD_SZ) #define DESC_AEAD_GIVENC_LEN (DESC_AEAD_ENC_LEN + 7 * CAAM_CMD_SZ) #define DESC_ABLKCIPHER_BASE (3 * CAAM_CMD_SZ) @@ -103,19 +103,6 @@ static inline void append_dec_op1(u32 *desc, u32 type) set_jump_tgt_here(desc, uncond_jump_cmd); } -/* - * Wait for completion of class 1 key loading before allowing - * error propagation - */ -static inline void append_dec_shr_done(u32 *desc) -{ - u32 *jump_cmd; - - jump_cmd = append_jump(desc, JUMP_CLASS_CLASS1 | JUMP_TEST_ALL); - set_jump_tgt_here(desc, jump_cmd); - append_cmd(desc, SET_OK_NO_PROP_ERRORS | CMD_LOAD); -} - /* * For aead functions, read payload and write payload, * both of which are specified in req->src and req->dst @@ -211,9 +198,6 @@ static void init_sh_desc_key_aead(u32 *desc, struct caam_ctx *ctx, append_key_aead(desc, ctx, keys_fit_inline); set_jump_tgt_here(desc, key_jump_cmd); - - /* Propagate errors from shared to job descriptor */ - append_cmd(desc, SET_OK_NO_PROP_ERRORS | CMD_LOAD); } static int aead_set_sh_desc(struct crypto_aead *aead) @@ -222,7 +206,6 @@ static int aead_set_sh_desc(struct crypto_aead *aead) struct caam_ctx *ctx = crypto_aead_ctx(aead); struct device *jrdev = ctx->jrdev; bool keys_fit_inline = false; - u32 *key_jump_cmd, *jump_cmd; u32 geniv, moveiv; u32 *desc; @@ -253,7 +236,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead) /* assoclen + cryptlen = seqinlen - ivsize */ append_math_sub_imm_u32(desc, REG2, SEQINLEN, IMM, tfm->ivsize); - /* assoclen + cryptlen = (assoclen + cryptlen) - cryptlen */ + /* assoclen = (assoclen + cryptlen) - cryptlen */ append_math_sub(desc, VARSEQINLEN, REG2, REG3, CAAM_CMD_SZ); /* read assoc before reading payload */ @@ -296,28 +279,16 @@ static int aead_set_sh_desc(struct crypto_aead *aead) CAAM_DESC_BYTES_MAX) keys_fit_inline = true; + /* aead_decrypt shared descriptor */ desc = ctx->sh_desc_dec; - /* aead_decrypt shared descriptor */ - init_sh_desc(desc, HDR_SHARE_SERIAL); - - /* Skip if already shared */ - key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | - JUMP_COND_SHRD); - - append_key_aead(desc, ctx, keys_fit_inline); - - /* Only propagate error immediately if shared */ - jump_cmd = append_jump(desc, JUMP_TEST_ALL); - set_jump_tgt_here(desc, key_jump_cmd); - append_cmd(desc, SET_OK_NO_PROP_ERRORS | CMD_LOAD); - set_jump_tgt_here(desc, jump_cmd); + init_sh_desc_key_aead(desc, ctx, keys_fit_inline); /* Class 2 operation */ append_operation(desc, ctx->class2_alg_type | OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT | OP_ALG_ICV_ON); - /* assoclen + cryptlen = seqinlen - ivsize */ + /* assoclen + cryptlen = seqinlen - ivsize - authsize */ append_math_sub_imm_u32(desc, REG3, SEQINLEN, IMM, ctx->authsize + tfm->ivsize) /* assoclen = (assoclen + cryptlen) - cryptlen */ @@ -340,7 +311,6 @@ static int aead_set_sh_desc(struct crypto_aead *aead) /* Load ICV */ append_seq_fifo_load(desc, ctx->authsize, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_LAST2 | FIFOLD_TYPE_ICV); - append_dec_shr_done(desc); ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc, desc_bytes(desc), @@ -532,7 +502,7 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, struct ablkcipher_tfm *tfm = &ablkcipher->base.crt_ablkcipher; struct device *jrdev = ctx->jrdev; int ret = 0; - u32 *key_jump_cmd, *jump_cmd; + u32 *key_jump_cmd; u32 *desc; #ifdef DEBUG @@ -563,9 +533,6 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, set_jump_tgt_here(desc, key_jump_cmd); - /* Propagate errors from shared to job descriptor */ - append_cmd(desc, SET_OK_NO_PROP_ERRORS | CMD_LOAD); - /* Load iv */ append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT | LDST_CLASS_1_CCB | tfm->ivsize); @@ -603,11 +570,7 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, ctx->enckeylen, CLASS_1 | KEY_DEST_CLASS_REG); - /* For aead, only propagate error immediately if shared */ - jump_cmd = append_jump(desc, JUMP_TEST_ALL); set_jump_tgt_here(desc, key_jump_cmd); - append_cmd(desc, SET_OK_NO_PROP_ERRORS | CMD_LOAD); - set_jump_tgt_here(desc, jump_cmd); /* load IV */ append_cmd(desc, CMD_SEQ_LOAD | LDST_SRCDST_BYTE_CONTEXT | @@ -619,9 +582,6 @@ static int ablkcipher_setkey(struct crypto_ablkcipher *ablkcipher, /* Perform operation */ ablkcipher_append_src_dst(desc); - /* Wait for key to load before allowing propagating error */ - append_dec_shr_done(desc); - ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc, desc_bytes(desc), DMA_TO_DEVICE); From 72567258f0643eda5d622be16e35fb933aa6146e Mon Sep 17 00:00:00 2001 From: Horia Geanta Date: Fri, 14 Mar 2014 17:46:50 +0200 Subject: [PATCH 51/60] crypto: export NULL algorithms defines These defines might be needed by crypto drivers. Signed-off-by: Horia Geanta Signed-off-by: Herbert Xu --- crypto/crypto_null.c | 6 +----- include/crypto/null.h | 11 +++++++++++ 2 files changed, 12 insertions(+), 5 deletions(-) create mode 100644 include/crypto/null.h diff --git a/crypto/crypto_null.c b/crypto/crypto_null.c index fee7265cd35d..1dc54bb95a87 100644 --- a/crypto/crypto_null.c +++ b/crypto/crypto_null.c @@ -17,6 +17,7 @@ * */ +#include #include #include #include @@ -24,11 +25,6 @@ #include #include -#define NULL_KEY_SIZE 0 -#define NULL_BLOCK_SIZE 1 -#define NULL_DIGEST_SIZE 0 -#define NULL_IV_SIZE 0 - static int null_compress(struct crypto_tfm *tfm, const u8 *src, unsigned int slen, u8 *dst, unsigned int *dlen) { diff --git a/include/crypto/null.h b/include/crypto/null.h new file mode 100644 index 000000000000..b7c864cc70df --- /dev/null +++ b/include/crypto/null.h @@ -0,0 +1,11 @@ +/* Values for NULL algorithms */ + +#ifndef _CRYPTO_NULL_H +#define _CRYPTO_NULL_H + +#define NULL_KEY_SIZE 0 +#define NULL_BLOCK_SIZE 1 +#define NULL_DIGEST_SIZE 0 +#define NULL_IV_SIZE 0 + +#endif From bca4feb0d4fe2d5da1a0f31ef89f63709aba4906 Mon Sep 17 00:00:00 2001 From: Horia Geanta Date: Fri, 14 Mar 2014 17:46:51 +0200 Subject: [PATCH 52/60] crypto: testmgr - add aead null encryption test vectors Add test vectors for aead with null encryption and md5, respectively sha1 authentication. Input data is taken from test vectors listed in RFC2410. Signed-off-by: Horia Geanta Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 8 +++ crypto/testmgr.c | 32 +++++++++ crypto/testmgr.h | 180 +++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 220 insertions(+) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 0d9003ae8c61..870be7b4dc05 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -1511,6 +1511,14 @@ static int do_test(int m) ret += tcrypt_test("authenc(hmac(sha1),cbc(aes))"); break; + case 156: + ret += tcrypt_test("authenc(hmac(md5),ecb(cipher_null))"); + break; + + case 157: + ret += tcrypt_test("authenc(hmac(sha1),ecb(cipher_null))"); + break; + case 200: test_cipher_speed("ecb(aes)", ENCRYPT, sec, NULL, 0, speed_template_16_24_32); diff --git a/crypto/testmgr.c b/crypto/testmgr.c index 77955507f6f1..dc3cf3535ef0 100644 --- a/crypto/testmgr.c +++ b/crypto/testmgr.c @@ -1808,6 +1808,22 @@ static const struct alg_test_desc alg_test_descs[] = { .count = ANSI_CPRNG_AES_TEST_VECTORS } } + }, { + .alg = "authenc(hmac(md5),ecb(cipher_null))", + .test = alg_test_aead, + .fips_allowed = 1, + .suite = { + .aead = { + .enc = { + .vecs = hmac_md5_ecb_cipher_null_enc_tv_template, + .count = HMAC_MD5_ECB_CIPHER_NULL_ENC_TEST_VECTORS + }, + .dec = { + .vecs = hmac_md5_ecb_cipher_null_dec_tv_template, + .count = HMAC_MD5_ECB_CIPHER_NULL_DEC_TEST_VECTORS + } + } + } }, { .alg = "authenc(hmac(sha1),cbc(aes))", .test = alg_test_aead, @@ -1820,6 +1836,22 @@ static const struct alg_test_desc alg_test_descs[] = { } } } + }, { + .alg = "authenc(hmac(sha1),ecb(cipher_null))", + .test = alg_test_aead, + .fips_allowed = 1, + .suite = { + .aead = { + .enc = { + .vecs = hmac_sha1_ecb_cipher_null_enc_tv_template, + .count = HMAC_SHA1_ECB_CIPHER_NULL_ENC_TEST_VECTORS + }, + .dec = { + .vecs = hmac_sha1_ecb_cipher_null_dec_tv_template, + .count = HMAC_SHA1_ECB_CIPHER_NULL_DEC_TEST_VECTORS + } + } + } }, { .alg = "authenc(hmac(sha256),cbc(aes))", .test = alg_test_aead, diff --git a/crypto/testmgr.h b/crypto/testmgr.h index 7d44aa3d6b44..3db83dbba1d9 100644 --- a/crypto/testmgr.h +++ b/crypto/testmgr.h @@ -12821,6 +12821,10 @@ static struct cipher_testvec cast6_xts_dec_tv_template[] = { #define AES_DEC_TEST_VECTORS 4 #define AES_CBC_ENC_TEST_VECTORS 5 #define AES_CBC_DEC_TEST_VECTORS 5 +#define HMAC_MD5_ECB_CIPHER_NULL_ENC_TEST_VECTORS 2 +#define HMAC_MD5_ECB_CIPHER_NULL_DEC_TEST_VECTORS 2 +#define HMAC_SHA1_ECB_CIPHER_NULL_ENC_TEST_VECTORS 2 +#define HMAC_SHA1_ECB_CIPHER_NULL_DEC_TEST_VECTORS 2 #define HMAC_SHA1_AES_CBC_ENC_TEST_VECTORS 7 #define HMAC_SHA256_AES_CBC_ENC_TEST_VECTORS 7 #define HMAC_SHA512_AES_CBC_ENC_TEST_VECTORS 7 @@ -13627,6 +13631,90 @@ static struct cipher_testvec aes_cbc_dec_tv_template[] = { }, }; +static struct aead_testvec hmac_md5_ecb_cipher_null_enc_tv_template[] = { + { /* Input data from RFC 2410 Case 1 */ +#ifdef __LITTLE_ENDIAN + .key = "\x08\x00" /* rta length */ + "\x01\x00" /* rta type */ +#else + .key = "\x00\x08" /* rta length */ + "\x00\x01" /* rta type */ +#endif + "\x00\x00\x00\x00" /* enc key length */ + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .klen = 8 + 16 + 0, + .iv = "", + .input = "\x01\x23\x45\x67\x89\xab\xcd\xef", + .ilen = 8, + .result = "\x01\x23\x45\x67\x89\xab\xcd\xef" + "\xaa\x42\xfe\x43\x8d\xea\xa3\x5a" + "\xb9\x3d\x9f\xb1\xa3\x8e\x9b\xae", + .rlen = 8 + 16, + }, { /* Input data from RFC 2410 Case 2 */ +#ifdef __LITTLE_ENDIAN + .key = "\x08\x00" /* rta length */ + "\x01\x00" /* rta type */ +#else + .key = "\x00\x08" /* rta length */ + "\x00\x01" /* rta type */ +#endif + "\x00\x00\x00\x00" /* enc key length */ + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .klen = 8 + 16 + 0, + .iv = "", + .input = "Network Security People Have A Strange Sense Of Humor", + .ilen = 53, + .result = "Network Security People Have A Strange Sense Of Humor" + "\x73\xa5\x3e\x1c\x08\x0e\x8a\x8a" + "\x8e\xb5\x5f\x90\x8e\xfe\x13\x23", + .rlen = 53 + 16, + }, +}; + +static struct aead_testvec hmac_md5_ecb_cipher_null_dec_tv_template[] = { + { +#ifdef __LITTLE_ENDIAN + .key = "\x08\x00" /* rta length */ + "\x01\x00" /* rta type */ +#else + .key = "\x00\x08" /* rta length */ + "\x00\x01" /* rta type */ +#endif + "\x00\x00\x00\x00" /* enc key length */ + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .klen = 8 + 16 + 0, + .iv = "", + .input = "\x01\x23\x45\x67\x89\xab\xcd\xef" + "\xaa\x42\xfe\x43\x8d\xea\xa3\x5a" + "\xb9\x3d\x9f\xb1\xa3\x8e\x9b\xae", + .ilen = 8 + 16, + .result = "\x01\x23\x45\x67\x89\xab\xcd\xef", + .rlen = 8, + }, { +#ifdef __LITTLE_ENDIAN + .key = "\x08\x00" /* rta length */ + "\x01\x00" /* rta type */ +#else + .key = "\x00\x08" /* rta length */ + "\x00\x01" /* rta type */ +#endif + "\x00\x00\x00\x00" /* enc key length */ + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00", + .klen = 8 + 16 + 0, + .iv = "", + .input = "Network Security People Have A Strange Sense Of Humor" + "\x73\xa5\x3e\x1c\x08\x0e\x8a\x8a" + "\x8e\xb5\x5f\x90\x8e\xfe\x13\x23", + .ilen = 53 + 16, + .result = "Network Security People Have A Strange Sense Of Humor", + .rlen = 53, + }, +}; + static struct aead_testvec hmac_sha1_aes_cbc_enc_tv_template[] = { { /* RFC 3602 Case 1 */ #ifdef __LITTLE_ENDIAN @@ -13876,6 +13964,98 @@ static struct aead_testvec hmac_sha1_aes_cbc_enc_tv_template[] = { }, }; +static struct aead_testvec hmac_sha1_ecb_cipher_null_enc_tv_template[] = { + { /* Input data from RFC 2410 Case 1 */ +#ifdef __LITTLE_ENDIAN + .key = "\x08\x00" /* rta length */ + "\x01\x00" /* rta type */ +#else + .key = "\x00\x08" /* rta length */ + "\x00\x01" /* rta type */ +#endif + "\x00\x00\x00\x00" /* enc key length */ + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00", + .klen = 8 + 20 + 0, + .iv = "", + .input = "\x01\x23\x45\x67\x89\xab\xcd\xef", + .ilen = 8, + .result = "\x01\x23\x45\x67\x89\xab\xcd\xef" + "\x40\xc3\x0a\xa1\xc9\xa0\x28\xab" + "\x99\x5e\x19\x04\xd1\x72\xef\xb8" + "\x8c\x5e\xe4\x08", + .rlen = 8 + 20, + }, { /* Input data from RFC 2410 Case 2 */ +#ifdef __LITTLE_ENDIAN + .key = "\x08\x00" /* rta length */ + "\x01\x00" /* rta type */ +#else + .key = "\x00\x08" /* rta length */ + "\x00\x01" /* rta type */ +#endif + "\x00\x00\x00\x00" /* enc key length */ + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00", + .klen = 8 + 20 + 0, + .iv = "", + .input = "Network Security People Have A Strange Sense Of Humor", + .ilen = 53, + .result = "Network Security People Have A Strange Sense Of Humor" + "\x75\x6f\x42\x1e\xf8\x50\x21\xd2" + "\x65\x47\xee\x8e\x1a\xef\x16\xf6" + "\x91\x56\xe4\xd6", + .rlen = 53 + 20, + }, +}; + +static struct aead_testvec hmac_sha1_ecb_cipher_null_dec_tv_template[] = { + { +#ifdef __LITTLE_ENDIAN + .key = "\x08\x00" /* rta length */ + "\x01\x00" /* rta type */ +#else + .key = "\x00\x08" /* rta length */ + "\x00\x01" /* rta type */ +#endif + "\x00\x00\x00\x00" /* enc key length */ + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00", + .klen = 8 + 20 + 0, + .iv = "", + .input = "\x01\x23\x45\x67\x89\xab\xcd\xef" + "\x40\xc3\x0a\xa1\xc9\xa0\x28\xab" + "\x99\x5e\x19\x04\xd1\x72\xef\xb8" + "\x8c\x5e\xe4\x08", + .ilen = 8 + 20, + .result = "\x01\x23\x45\x67\x89\xab\xcd\xef", + .rlen = 8, + }, { +#ifdef __LITTLE_ENDIAN + .key = "\x08\x00" /* rta length */ + "\x01\x00" /* rta type */ +#else + .key = "\x00\x08" /* rta length */ + "\x00\x01" /* rta type */ +#endif + "\x00\x00\x00\x00" /* enc key length */ + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00\x00\x00\x00\x00" + "\x00\x00\x00\x00", + .klen = 8 + 20 + 0, + .iv = "", + .input = "Network Security People Have A Strange Sense Of Humor" + "\x75\x6f\x42\x1e\xf8\x50\x21\xd2" + "\x65\x47\xee\x8e\x1a\xef\x16\xf6" + "\x91\x56\xe4\xd6", + .ilen = 53 + 20, + .result = "Network Security People Have A Strange Sense Of Humor", + .rlen = 53, + }, +}; + static struct aead_testvec hmac_sha256_aes_cbc_enc_tv_template[] = { { /* RFC 3602 Case 1 */ #ifdef __LITTLE_ENDIAN From ae4a825ffdd6fd769af2667e03070940af1b8368 Mon Sep 17 00:00:00 2001 From: Horia Geanta Date: Fri, 14 Mar 2014 17:46:52 +0200 Subject: [PATCH 53/60] crypto: caam - add support for aead null encryption Add support for the following combinations: -encryption: null -authentication: md5, sha* (1, 224, 256, 384, 512) Signed-off-by: Tudor Ambarus Signed-off-by: Horia Geanta Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamalg.c | 327 +++++++++++++++++++++++++++++- drivers/crypto/caam/compat.h | 1 + drivers/crypto/caam/desc_constr.h | 27 ++- 3 files changed, 342 insertions(+), 13 deletions(-) diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index 5016e63b6c25..a9ba8b159636 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -70,6 +70,10 @@ #define DESC_AEAD_DEC_LEN (DESC_AEAD_BASE + 18 * CAAM_CMD_SZ) #define DESC_AEAD_GIVENC_LEN (DESC_AEAD_ENC_LEN + 7 * CAAM_CMD_SZ) +#define DESC_AEAD_NULL_BASE (3 * CAAM_CMD_SZ) +#define DESC_AEAD_NULL_ENC_LEN (DESC_AEAD_NULL_BASE + 14 * CAAM_CMD_SZ) +#define DESC_AEAD_NULL_DEC_LEN (DESC_AEAD_NULL_BASE + 17 * CAAM_CMD_SZ) + #define DESC_ABLKCIPHER_BASE (3 * CAAM_CMD_SZ) #define DESC_ABLKCIPHER_ENC_LEN (DESC_ABLKCIPHER_BASE + \ 20 * CAAM_CMD_SZ) @@ -109,9 +113,9 @@ static inline void append_dec_op1(u32 *desc, u32 type) */ static inline void aead_append_src_dst(u32 *desc, u32 msg_type) { + append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF); append_seq_fifo_load(desc, 0, FIFOLD_CLASS_BOTH | KEY_VLF | msg_type | FIFOLD_TYPE_LASTBOTH); - append_seq_fifo_store(desc, 0, FIFOST_TYPE_MESSAGE_DATA | KEY_VLF); } /* @@ -200,6 +204,196 @@ static void init_sh_desc_key_aead(u32 *desc, struct caam_ctx *ctx, set_jump_tgt_here(desc, key_jump_cmd); } +static int aead_null_set_sh_desc(struct crypto_aead *aead) +{ + struct aead_tfm *tfm = &aead->base.crt_aead; + struct caam_ctx *ctx = crypto_aead_ctx(aead); + struct device *jrdev = ctx->jrdev; + bool keys_fit_inline = false; + u32 *key_jump_cmd, *jump_cmd, *read_move_cmd, *write_move_cmd; + u32 *desc; + + /* + * Job Descriptor and Shared Descriptors + * must all fit into the 64-word Descriptor h/w Buffer + */ + if (DESC_AEAD_NULL_ENC_LEN + DESC_JOB_IO_LEN + + ctx->split_key_pad_len <= CAAM_DESC_BYTES_MAX) + keys_fit_inline = true; + + /* aead_encrypt shared descriptor */ + desc = ctx->sh_desc_enc; + + init_sh_desc(desc, HDR_SHARE_SERIAL); + + /* Skip if already shared */ + key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | + JUMP_COND_SHRD); + if (keys_fit_inline) + append_key_as_imm(desc, ctx->key, ctx->split_key_pad_len, + ctx->split_key_len, CLASS_2 | + KEY_DEST_MDHA_SPLIT | KEY_ENC); + else + append_key(desc, ctx->key_dma, ctx->split_key_len, CLASS_2 | + KEY_DEST_MDHA_SPLIT | KEY_ENC); + set_jump_tgt_here(desc, key_jump_cmd); + + /* cryptlen = seqoutlen - authsize */ + append_math_sub_imm_u32(desc, REG3, SEQOUTLEN, IMM, ctx->authsize); + + /* + * NULL encryption; IV is zero + * assoclen = (assoclen + cryptlen) - cryptlen + */ + append_math_sub(desc, VARSEQINLEN, SEQINLEN, REG3, CAAM_CMD_SZ); + + /* read assoc before reading payload */ + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG | + KEY_VLF); + + /* Prepare to read and write cryptlen bytes */ + append_math_add(desc, VARSEQINLEN, ZERO, REG3, CAAM_CMD_SZ); + append_math_add(desc, VARSEQOUTLEN, ZERO, REG3, CAAM_CMD_SZ); + + /* + * MOVE_LEN opcode is not available in all SEC HW revisions, + * thus need to do some magic, i.e. self-patch the descriptor + * buffer. + */ + read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF | + MOVE_DEST_MATH3 | + (0x6 << MOVE_LEN_SHIFT)); + write_move_cmd = append_move(desc, MOVE_SRC_MATH3 | + MOVE_DEST_DESCBUF | + MOVE_WAITCOMP | + (0x8 << MOVE_LEN_SHIFT)); + + /* Class 2 operation */ + append_operation(desc, ctx->class2_alg_type | + OP_ALG_AS_INITFINAL | OP_ALG_ENCRYPT); + + /* Read and write cryptlen bytes */ + aead_append_src_dst(desc, FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1); + + set_move_tgt_here(desc, read_move_cmd); + set_move_tgt_here(desc, write_move_cmd); + append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO); + append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO | + MOVE_AUX_LS); + + /* Write ICV */ + append_seq_store(desc, ctx->authsize, LDST_CLASS_2_CCB | + LDST_SRCDST_BYTE_CONTEXT); + + ctx->sh_desc_enc_dma = dma_map_single(jrdev, desc, + desc_bytes(desc), + DMA_TO_DEVICE); + if (dma_mapping_error(jrdev, ctx->sh_desc_enc_dma)) { + dev_err(jrdev, "unable to map shared descriptor\n"); + return -ENOMEM; + } +#ifdef DEBUG + print_hex_dump(KERN_ERR, + "aead null enc shdesc@"__stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, + desc_bytes(desc), 1); +#endif + + /* + * Job Descriptor and Shared Descriptors + * must all fit into the 64-word Descriptor h/w Buffer + */ + if (DESC_AEAD_NULL_DEC_LEN + DESC_JOB_IO_LEN + + ctx->split_key_pad_len <= CAAM_DESC_BYTES_MAX) + keys_fit_inline = true; + + desc = ctx->sh_desc_dec; + + /* aead_decrypt shared descriptor */ + init_sh_desc(desc, HDR_SHARE_SERIAL); + + /* Skip if already shared */ + key_jump_cmd = append_jump(desc, JUMP_JSL | JUMP_TEST_ALL | + JUMP_COND_SHRD); + if (keys_fit_inline) + append_key_as_imm(desc, ctx->key, ctx->split_key_pad_len, + ctx->split_key_len, CLASS_2 | + KEY_DEST_MDHA_SPLIT | KEY_ENC); + else + append_key(desc, ctx->key_dma, ctx->split_key_len, CLASS_2 | + KEY_DEST_MDHA_SPLIT | KEY_ENC); + set_jump_tgt_here(desc, key_jump_cmd); + + /* Class 2 operation */ + append_operation(desc, ctx->class2_alg_type | + OP_ALG_AS_INITFINAL | OP_ALG_DECRYPT | OP_ALG_ICV_ON); + + /* assoclen + cryptlen = seqinlen - ivsize - authsize */ + append_math_sub_imm_u32(desc, REG3, SEQINLEN, IMM, + ctx->authsize + tfm->ivsize); + /* assoclen = (assoclen + cryptlen) - cryptlen */ + append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ); + append_math_sub(desc, VARSEQINLEN, REG3, REG2, CAAM_CMD_SZ); + + /* read assoc before reading payload */ + append_seq_fifo_load(desc, 0, FIFOLD_CLASS_CLASS2 | FIFOLD_TYPE_MSG | + KEY_VLF); + + /* Prepare to read and write cryptlen bytes */ + append_math_add(desc, VARSEQINLEN, ZERO, REG2, CAAM_CMD_SZ); + append_math_add(desc, VARSEQOUTLEN, ZERO, REG2, CAAM_CMD_SZ); + + /* + * MOVE_LEN opcode is not available in all SEC HW revisions, + * thus need to do some magic, i.e. self-patch the descriptor + * buffer. + */ + read_move_cmd = append_move(desc, MOVE_SRC_DESCBUF | + MOVE_DEST_MATH2 | + (0x6 << MOVE_LEN_SHIFT)); + write_move_cmd = append_move(desc, MOVE_SRC_MATH2 | + MOVE_DEST_DESCBUF | + MOVE_WAITCOMP | + (0x8 << MOVE_LEN_SHIFT)); + + /* Read and write cryptlen bytes */ + aead_append_src_dst(desc, FIFOLD_TYPE_MSG | FIFOLD_TYPE_FLUSH1); + + /* + * Insert a NOP here, since we need at least 4 instructions between + * code patching the descriptor buffer and the location being patched. + */ + jump_cmd = append_jump(desc, JUMP_TEST_ALL); + set_jump_tgt_here(desc, jump_cmd); + + set_move_tgt_here(desc, read_move_cmd); + set_move_tgt_here(desc, write_move_cmd); + append_cmd(desc, CMD_LOAD | DISABLE_AUTO_INFO_FIFO); + append_move(desc, MOVE_SRC_INFIFO_CL | MOVE_DEST_OUTFIFO | + MOVE_AUX_LS); + append_cmd(desc, CMD_LOAD | ENABLE_AUTO_INFO_FIFO); + + /* Load ICV */ + append_seq_fifo_load(desc, ctx->authsize, FIFOLD_CLASS_CLASS2 | + FIFOLD_TYPE_LAST2 | FIFOLD_TYPE_ICV); + + ctx->sh_desc_dec_dma = dma_map_single(jrdev, desc, + desc_bytes(desc), + DMA_TO_DEVICE); + if (dma_mapping_error(jrdev, ctx->sh_desc_dec_dma)) { + dev_err(jrdev, "unable to map shared descriptor\n"); + return -ENOMEM; + } +#ifdef DEBUG + print_hex_dump(KERN_ERR, + "aead null dec shdesc@"__stringify(__LINE__)": ", + DUMP_PREFIX_ADDRESS, 16, 4, desc, + desc_bytes(desc), 1); +#endif + + return 0; +} + static int aead_set_sh_desc(struct crypto_aead *aead) { struct aead_tfm *tfm = &aead->base.crt_aead; @@ -209,9 +403,13 @@ static int aead_set_sh_desc(struct crypto_aead *aead) u32 geniv, moveiv; u32 *desc; - if (!ctx->enckeylen || !ctx->authsize) + if (!ctx->authsize) return 0; + /* NULL encryption / decryption */ + if (!ctx->enckeylen) + return aead_null_set_sh_desc(aead); + /* * Job Descriptor and Shared Descriptors * must all fit into the 64-word Descriptor h/w Buffer @@ -290,7 +488,7 @@ static int aead_set_sh_desc(struct crypto_aead *aead) /* assoclen + cryptlen = seqinlen - ivsize - authsize */ append_math_sub_imm_u32(desc, REG3, SEQINLEN, IMM, - ctx->authsize + tfm->ivsize) + ctx->authsize + tfm->ivsize); /* assoclen = (assoclen + cryptlen) - cryptlen */ append_math_sub(desc, REG2, SEQOUTLEN, REG0, CAAM_CMD_SZ); append_math_sub(desc, VARSEQINLEN, REG3, REG2, CAAM_CMD_SZ); @@ -1419,6 +1617,11 @@ static int aead_givencrypt(struct aead_givcrypt_request *areq) return ret; } +static int aead_null_givencrypt(struct aead_givcrypt_request *areq) +{ + return aead_encrypt(&areq->areq); +} + /* * allocate and map the ablkcipher extended descriptor for ablkcipher */ @@ -1607,6 +1810,124 @@ struct caam_alg_template { static struct caam_alg_template driver_algs[] = { /* single-pass ipsec_esp descriptor */ + { + .name = "authenc(hmac(md5),ecb(cipher_null))", + .driver_name = "authenc-hmac-md5-ecb-cipher_null-caam", + .blocksize = NULL_BLOCK_SIZE, + .type = CRYPTO_ALG_TYPE_AEAD, + .template_aead = { + .setkey = aead_setkey, + .setauthsize = aead_setauthsize, + .encrypt = aead_encrypt, + .decrypt = aead_decrypt, + .givencrypt = aead_null_givencrypt, + .geniv = "", + .ivsize = NULL_IV_SIZE, + .maxauthsize = MD5_DIGEST_SIZE, + }, + .class1_alg_type = 0, + .class2_alg_type = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC_PRECOMP, + .alg_op = OP_ALG_ALGSEL_MD5 | OP_ALG_AAI_HMAC, + }, + { + .name = "authenc(hmac(sha1),ecb(cipher_null))", + .driver_name = "authenc-hmac-sha1-ecb-cipher_null-caam", + .blocksize = NULL_BLOCK_SIZE, + .type = CRYPTO_ALG_TYPE_AEAD, + .template_aead = { + .setkey = aead_setkey, + .setauthsize = aead_setauthsize, + .encrypt = aead_encrypt, + .decrypt = aead_decrypt, + .givencrypt = aead_null_givencrypt, + .geniv = "", + .ivsize = NULL_IV_SIZE, + .maxauthsize = SHA1_DIGEST_SIZE, + }, + .class1_alg_type = 0, + .class2_alg_type = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC_PRECOMP, + .alg_op = OP_ALG_ALGSEL_SHA1 | OP_ALG_AAI_HMAC, + }, + { + .name = "authenc(hmac(sha224),ecb(cipher_null))", + .driver_name = "authenc-hmac-sha224-ecb-cipher_null-caam", + .blocksize = NULL_BLOCK_SIZE, + .type = CRYPTO_ALG_TYPE_AEAD, + .template_aead = { + .setkey = aead_setkey, + .setauthsize = aead_setauthsize, + .encrypt = aead_encrypt, + .decrypt = aead_decrypt, + .givencrypt = aead_null_givencrypt, + .geniv = "", + .ivsize = NULL_IV_SIZE, + .maxauthsize = SHA224_DIGEST_SIZE, + }, + .class1_alg_type = 0, + .class2_alg_type = OP_ALG_ALGSEL_SHA224 | + OP_ALG_AAI_HMAC_PRECOMP, + .alg_op = OP_ALG_ALGSEL_SHA224 | OP_ALG_AAI_HMAC, + }, + { + .name = "authenc(hmac(sha256),ecb(cipher_null))", + .driver_name = "authenc-hmac-sha256-ecb-cipher_null-caam", + .blocksize = NULL_BLOCK_SIZE, + .type = CRYPTO_ALG_TYPE_AEAD, + .template_aead = { + .setkey = aead_setkey, + .setauthsize = aead_setauthsize, + .encrypt = aead_encrypt, + .decrypt = aead_decrypt, + .givencrypt = aead_null_givencrypt, + .geniv = "", + .ivsize = NULL_IV_SIZE, + .maxauthsize = SHA256_DIGEST_SIZE, + }, + .class1_alg_type = 0, + .class2_alg_type = OP_ALG_ALGSEL_SHA256 | + OP_ALG_AAI_HMAC_PRECOMP, + .alg_op = OP_ALG_ALGSEL_SHA256 | OP_ALG_AAI_HMAC, + }, + { + .name = "authenc(hmac(sha384),ecb(cipher_null))", + .driver_name = "authenc-hmac-sha384-ecb-cipher_null-caam", + .blocksize = NULL_BLOCK_SIZE, + .type = CRYPTO_ALG_TYPE_AEAD, + .template_aead = { + .setkey = aead_setkey, + .setauthsize = aead_setauthsize, + .encrypt = aead_encrypt, + .decrypt = aead_decrypt, + .givencrypt = aead_null_givencrypt, + .geniv = "", + .ivsize = NULL_IV_SIZE, + .maxauthsize = SHA384_DIGEST_SIZE, + }, + .class1_alg_type = 0, + .class2_alg_type = OP_ALG_ALGSEL_SHA384 | + OP_ALG_AAI_HMAC_PRECOMP, + .alg_op = OP_ALG_ALGSEL_SHA384 | OP_ALG_AAI_HMAC, + }, + { + .name = "authenc(hmac(sha512),ecb(cipher_null))", + .driver_name = "authenc-hmac-sha512-ecb-cipher_null-caam", + .blocksize = NULL_BLOCK_SIZE, + .type = CRYPTO_ALG_TYPE_AEAD, + .template_aead = { + .setkey = aead_setkey, + .setauthsize = aead_setauthsize, + .encrypt = aead_encrypt, + .decrypt = aead_decrypt, + .givencrypt = aead_null_givencrypt, + .geniv = "", + .ivsize = NULL_IV_SIZE, + .maxauthsize = SHA512_DIGEST_SIZE, + }, + .class1_alg_type = 0, + .class2_alg_type = OP_ALG_ALGSEL_SHA512 | + OP_ALG_AAI_HMAC_PRECOMP, + .alg_op = OP_ALG_ALGSEL_SHA512 | OP_ALG_AAI_HMAC, + }, { .name = "authenc(hmac(md5),cbc(aes))", .driver_name = "authenc-hmac-md5-cbc-aes-caam", diff --git a/drivers/crypto/caam/compat.h b/drivers/crypto/caam/compat.h index 762aeff626ac..f227922cea38 100644 --- a/drivers/crypto/caam/compat.h +++ b/drivers/crypto/caam/compat.h @@ -26,6 +26,7 @@ #include #include +#include #include #include #include diff --git a/drivers/crypto/caam/desc_constr.h b/drivers/crypto/caam/desc_constr.h index cd5f678847ce..7eec20bb3849 100644 --- a/drivers/crypto/caam/desc_constr.h +++ b/drivers/crypto/caam/desc_constr.h @@ -155,21 +155,29 @@ static inline void append_cmd_data(u32 *desc, void *data, int len, append_data(desc, data, len); } -static inline u32 *append_jump(u32 *desc, u32 options) -{ - u32 *cmd = desc_end(desc); - - PRINT_POS; - append_cmd(desc, CMD_JUMP | options); - - return cmd; +#define APPEND_CMD_RET(cmd, op) \ +static inline u32 *append_##cmd(u32 *desc, u32 options) \ +{ \ + u32 *cmd = desc_end(desc); \ + PRINT_POS; \ + append_cmd(desc, CMD_##op | options); \ + return cmd; \ } +APPEND_CMD_RET(jump, JUMP) +APPEND_CMD_RET(move, MOVE) static inline void set_jump_tgt_here(u32 *desc, u32 *jump_cmd) { *jump_cmd = *jump_cmd | (desc_len(desc) - (jump_cmd - desc)); } +static inline void set_move_tgt_here(u32 *desc, u32 *move_cmd) +{ + *move_cmd &= ~MOVE_OFFSET_MASK; + *move_cmd = *move_cmd | ((desc_len(desc) << (MOVE_OFFSET_SHIFT + 2)) & + MOVE_OFFSET_MASK); +} + #define APPEND_CMD(cmd, op) \ static inline void append_##cmd(u32 *desc, u32 options) \ { \ @@ -177,7 +185,6 @@ static inline void append_##cmd(u32 *desc, u32 options) \ append_cmd(desc, CMD_##op | options); \ } APPEND_CMD(operation, OPERATION) -APPEND_CMD(move, MOVE) #define APPEND_CMD_LEN(cmd, op) \ static inline void append_##cmd(u32 *desc, unsigned int len, u32 options) \ @@ -328,7 +335,7 @@ append_cmd(desc, CMD_MATH | MATH_FUN_##op | MATH_DEST_##dest | \ do { \ APPEND_MATH(op, desc, dest, src_0, src_1, CAAM_CMD_SZ); \ append_cmd(desc, data); \ -} while (0); +} while (0) #define append_math_add_imm_u32(desc, dest, src0, src1, data) \ APPEND_MATH_IMM_u32(ADD, desc, dest, src0, src1, data) From ec31eed754e2fd43938e72ef9e1b60aa55ccb417 Mon Sep 17 00:00:00 2001 From: Horia Geanta Date: Fri, 14 Mar 2014 17:48:30 +0200 Subject: [PATCH 54/60] crypto: caam - add missing key_dma unmap (struct caam_ctx) ctx->key_dma needs to be unmapped when context is cleaned up. Signed-off-by: Horia Geanta Signed-off-by: Herbert Xu --- drivers/crypto/caam/caamalg.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/crypto/caam/caamalg.c b/drivers/crypto/caam/caamalg.c index a9ba8b159636..5f891254db73 100644 --- a/drivers/crypto/caam/caamalg.c +++ b/drivers/crypto/caam/caamalg.c @@ -2380,6 +2380,11 @@ static void caam_cra_exit(struct crypto_tfm *tfm) dma_unmap_single(ctx->jrdev, ctx->sh_desc_givenc_dma, desc_bytes(ctx->sh_desc_givenc), DMA_TO_DEVICE); + if (ctx->key_dma && + !dma_mapping_error(ctx->jrdev, ctx->key_dma)) + dma_unmap_single(ctx->jrdev, ctx->key_dma, + ctx->enckeylen + ctx->split_key_pad_len, + DMA_TO_DEVICE); caam_jr_free(ctx->jrdev); } From 130fa5bc81b44b6cc1fbdea3abf6db0da22964e0 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Mon, 17 Mar 2014 16:52:26 -0700 Subject: [PATCH 55/60] crypto: crypto_wq - Fix late crypto work queue initialization The crypto algorithm modules utilizing the crypto daemon could be used early when the system start up. Using module_init does not guarantee that the daemon's work queue is initialized when the cypto alorithm depending on crypto_wq starts. It is necessary to initialize the crypto work queue earlier at the subsystem init time to make sure that it is initialized when used. Signed-off-by: Tim Chen Signed-off-by: Herbert Xu --- crypto/crypto_wq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crypto/crypto_wq.c b/crypto/crypto_wq.c index adad92a44ba2..2f1b8d12952a 100644 --- a/crypto/crypto_wq.c +++ b/crypto/crypto_wq.c @@ -33,7 +33,7 @@ static void __exit crypto_wq_exit(void) destroy_workqueue(kcrypto_wq); } -module_init(crypto_wq_init); +subsys_initcall(crypto_wq_init); module_exit(crypto_wq_exit); MODULE_LICENSE("GPL"); From 7c1da8d0d046174a4188b5729d7579abf3d29427 Mon Sep 17 00:00:00 2001 From: chandramouli narayanan Date: Thu, 20 Mar 2014 15:14:00 -0700 Subject: [PATCH 56/60] crypto: sha - SHA1 transform x86_64 AVX2 This git patch adds x86_64 AVX2 optimization of SHA1 transform to crypto support. The patch has been tested with 3.14.0-rc1 kernel. On a Haswell desktop, with turbo disabled and all cpus running at maximum frequency, tcrypt shows AVX2 performance improvement from 3% for 256 bytes update to 16% for 1024 bytes update over AVX implementation. This patch adds sha1_avx2_transform(), the glue, build and configuration changes needed for AVX2 optimization of SHA1 transform to crypto support. sha1-ssse3 is one module which adds the necessary optimization support (SSSE3/AVX/AVX2) for the low-level SHA1 transform function. With better optimization support, transform function is overridden as the case may be. In the case of AVX2, due to performance reasons across datablock sizes, the AVX or AVX2 transform function is used at run-time as it suits best. The Makefile change therefore appends the necessary objects to the linkage. Due to this, the patch merely appends AVX2 transform to the existing build mix and Kconfig support and leaves the configuration build support as is. Signed-off-by: Chandramouli Narayanan Reviewed-by: Marek Vasut Acked-by: H. Peter Anvin Signed-off-by: Herbert Xu --- arch/x86/crypto/Makefile | 3 + arch/x86/crypto/sha1_avx2_x86_64_asm.S | 712 +++++++++++++++++++++++++ arch/x86/crypto/sha1_ssse3_glue.c | 49 +- crypto/Kconfig | 4 +- 4 files changed, 759 insertions(+), 9 deletions(-) create mode 100644 arch/x86/crypto/sha1_avx2_x86_64_asm.S diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 6ba54d640383..61d6e281898b 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -79,6 +79,9 @@ aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o aesni-intel-$(CONFIG_64BIT) += aesni-intel_avx-x86_64.o ghash-clmulni-intel-y := ghash-clmulni-intel_asm.o ghash-clmulni-intel_glue.o sha1-ssse3-y := sha1_ssse3_asm.o sha1_ssse3_glue.o +ifeq ($(avx2_supported),yes) +sha1-ssse3-y += sha1_avx2_x86_64_asm.o +endif crc32c-intel-y := crc32c-intel_glue.o crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S new file mode 100644 index 000000000000..4f348544d132 --- /dev/null +++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S @@ -0,0 +1,712 @@ +/* + * Implement fast SHA-1 with AVX2 instructions. (x86_64) + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Ilya Albrekht + * Maxim Locktyukhin + * Ronen Zohar + * Chandramouli Narayanan + * + * BSD LICENSE + * + * Copyright(c) 2014 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/* + * SHA-1 implementation with Intel(R) AVX2 instruction set extensions. + * + *This implementation is based on the previous SSSE3 release: + *Visit http://software.intel.com/en-us/articles/ + *and refer to improving-the-performance-of-the-secure-hash-algorithm-1/ + * + *Updates 20-byte SHA-1 record in 'hash' for even number of + *'num_blocks' consecutive 64-byte blocks + * + *extern "C" void sha1_transform_avx2( + * int *hash, const char* input, size_t num_blocks ); + */ + +#include + +#define CTX %rdi /* arg1 */ +#define BUF %rsi /* arg2 */ +#define CNT %rdx /* arg3 */ + +#define REG_A %ecx +#define REG_B %esi +#define REG_C %edi +#define REG_D %eax +#define REG_E %edx +#define REG_TB %ebx +#define REG_TA %r12d +#define REG_RA %rcx +#define REG_RB %rsi +#define REG_RC %rdi +#define REG_RD %rax +#define REG_RE %rdx +#define REG_RTA %r12 +#define REG_RTB %rbx +#define REG_T1 %ebp +#define xmm_mov vmovups +#define avx2_zeroupper vzeroupper +#define RND_F1 1 +#define RND_F2 2 +#define RND_F3 3 + +.macro REGALLOC + .set A, REG_A + .set B, REG_B + .set C, REG_C + .set D, REG_D + .set E, REG_E + .set TB, REG_TB + .set TA, REG_TA + + .set RA, REG_RA + .set RB, REG_RB + .set RC, REG_RC + .set RD, REG_RD + .set RE, REG_RE + + .set RTA, REG_RTA + .set RTB, REG_RTB + + .set T1, REG_T1 +.endm + +#define K_BASE %r8 +#define HASH_PTR %r9 +#define BUFFER_PTR %r10 +#define BUFFER_PTR2 %r13 +#define BUFFER_END %r11 + +#define PRECALC_BUF %r14 +#define WK_BUF %r15 + +#define W_TMP %xmm0 +#define WY_TMP %ymm0 +#define WY_TMP2 %ymm9 + +# AVX2 variables +#define WY0 %ymm3 +#define WY4 %ymm5 +#define WY08 %ymm7 +#define WY12 %ymm8 +#define WY16 %ymm12 +#define WY20 %ymm13 +#define WY24 %ymm14 +#define WY28 %ymm15 + +#define YMM_SHUFB_BSWAP %ymm10 + +/* + * Keep 2 iterations precalculated at a time: + * - 80 DWORDs per iteration * 2 + */ +#define W_SIZE (80*2*2 +16) + +#define WK(t) ((((t) % 80) / 4)*32 + ( (t) % 4)*4 + ((t)/80)*16 )(WK_BUF) +#define PRECALC_WK(t) ((t)*2*2)(PRECALC_BUF) + + +.macro UPDATE_HASH hash, val + add \hash, \val + mov \val, \hash +.endm + +.macro PRECALC_RESET_WY + .set WY_00, WY0 + .set WY_04, WY4 + .set WY_08, WY08 + .set WY_12, WY12 + .set WY_16, WY16 + .set WY_20, WY20 + .set WY_24, WY24 + .set WY_28, WY28 + .set WY_32, WY_00 +.endm + +.macro PRECALC_ROTATE_WY + /* Rotate macros */ + .set WY_32, WY_28 + .set WY_28, WY_24 + .set WY_24, WY_20 + .set WY_20, WY_16 + .set WY_16, WY_12 + .set WY_12, WY_08 + .set WY_08, WY_04 + .set WY_04, WY_00 + .set WY_00, WY_32 + + /* Define register aliases */ + .set WY, WY_00 + .set WY_minus_04, WY_04 + .set WY_minus_08, WY_08 + .set WY_minus_12, WY_12 + .set WY_minus_16, WY_16 + .set WY_minus_20, WY_20 + .set WY_minus_24, WY_24 + .set WY_minus_28, WY_28 + .set WY_minus_32, WY +.endm + +.macro PRECALC_00_15 + .if (i == 0) # Initialize and rotate registers + PRECALC_RESET_WY + PRECALC_ROTATE_WY + .endif + + /* message scheduling pre-compute for rounds 0-15 */ + .if ((i & 7) == 0) + /* + * blended AVX2 and ALU instruction scheduling + * 1 vector iteration per 8 rounds + */ + vmovdqu ((i * 2) + PRECALC_OFFSET)(BUFFER_PTR), W_TMP + .elseif ((i & 7) == 1) + vinsertf128 $1, (((i-1) * 2)+PRECALC_OFFSET)(BUFFER_PTR2),\ + WY_TMP, WY_TMP + .elseif ((i & 7) == 2) + vpshufb YMM_SHUFB_BSWAP, WY_TMP, WY + .elseif ((i & 7) == 4) + vpaddd K_XMM(K_BASE), WY, WY_TMP + .elseif ((i & 7) == 7) + vmovdqu WY_TMP, PRECALC_WK(i&~7) + + PRECALC_ROTATE_WY + .endif +.endm + +.macro PRECALC_16_31 + /* + * message scheduling pre-compute for rounds 16-31 + * calculating last 32 w[i] values in 8 XMM registers + * pre-calculate K+w[i] values and store to mem + * for later load by ALU add instruction + * + * "brute force" vectorization for rounds 16-31 only + * due to w[i]->w[i-3] dependency + */ + .if ((i & 7) == 0) + /* + * blended AVX2 and ALU instruction scheduling + * 1 vector iteration per 8 rounds + */ + /* w[i-14] */ + vpalignr $8, WY_minus_16, WY_minus_12, WY + vpsrldq $4, WY_minus_04, WY_TMP /* w[i-3] */ + .elseif ((i & 7) == 1) + vpxor WY_minus_08, WY, WY + vpxor WY_minus_16, WY_TMP, WY_TMP + .elseif ((i & 7) == 2) + vpxor WY_TMP, WY, WY + vpslldq $12, WY, WY_TMP2 + .elseif ((i & 7) == 3) + vpslld $1, WY, WY_TMP + vpsrld $31, WY, WY + .elseif ((i & 7) == 4) + vpor WY, WY_TMP, WY_TMP + vpslld $2, WY_TMP2, WY + .elseif ((i & 7) == 5) + vpsrld $30, WY_TMP2, WY_TMP2 + vpxor WY, WY_TMP, WY_TMP + .elseif ((i & 7) == 7) + vpxor WY_TMP2, WY_TMP, WY + vpaddd K_XMM(K_BASE), WY, WY_TMP + vmovdqu WY_TMP, PRECALC_WK(i&~7) + + PRECALC_ROTATE_WY + .endif +.endm + +.macro PRECALC_32_79 + /* + * in SHA-1 specification: + * w[i] = (w[i-3] ^ w[i-8] ^ w[i-14] ^ w[i-16]) rol 1 + * instead we do equal: + * w[i] = (w[i-6] ^ w[i-16] ^ w[i-28] ^ w[i-32]) rol 2 + * allows more efficient vectorization + * since w[i]=>w[i-3] dependency is broken + */ + + .if ((i & 7) == 0) + /* + * blended AVX2 and ALU instruction scheduling + * 1 vector iteration per 8 rounds + */ + vpalignr $8, WY_minus_08, WY_minus_04, WY_TMP + .elseif ((i & 7) == 1) + /* W is W_minus_32 before xor */ + vpxor WY_minus_28, WY, WY + .elseif ((i & 7) == 2) + vpxor WY_minus_16, WY_TMP, WY_TMP + .elseif ((i & 7) == 3) + vpxor WY_TMP, WY, WY + .elseif ((i & 7) == 4) + vpslld $2, WY, WY_TMP + .elseif ((i & 7) == 5) + vpsrld $30, WY, WY + vpor WY, WY_TMP, WY + .elseif ((i & 7) == 7) + vpaddd K_XMM(K_BASE), WY, WY_TMP + vmovdqu WY_TMP, PRECALC_WK(i&~7) + + PRECALC_ROTATE_WY + .endif +.endm + +.macro PRECALC r, s + .set i, \r + + .if (i < 40) + .set K_XMM, 32*0 + .elseif (i < 80) + .set K_XMM, 32*1 + .elseif (i < 120) + .set K_XMM, 32*2 + .else + .set K_XMM, 32*3 + .endif + + .if (i<32) + PRECALC_00_15 \s + .elseif (i<64) + PRECALC_16_31 \s + .elseif (i < 160) + PRECALC_32_79 \s + .endif +.endm + +.macro ROTATE_STATE + .set T_REG, E + .set E, D + .set D, C + .set C, B + .set B, TB + .set TB, A + .set A, T_REG + + .set T_REG, RE + .set RE, RD + .set RD, RC + .set RC, RB + .set RB, RTB + .set RTB, RA + .set RA, T_REG +.endm + +/* Macro relies on saved ROUND_Fx */ + +.macro RND_FUN f, r + .if (\f == RND_F1) + ROUND_F1 \r + .elseif (\f == RND_F2) + ROUND_F2 \r + .elseif (\f == RND_F3) + ROUND_F3 \r + .endif +.endm + +.macro RR r + .set round_id, (\r % 80) + + .if (round_id == 0) /* Precalculate F for first round */ + .set ROUND_FUNC, RND_F1 + mov B, TB + + rorx $(32-30), B, B /* b>>>2 */ + andn D, TB, T1 + and C, TB + xor T1, TB + .endif + + RND_FUN ROUND_FUNC, \r + ROTATE_STATE + + .if (round_id == 18) + .set ROUND_FUNC, RND_F2 + .elseif (round_id == 38) + .set ROUND_FUNC, RND_F3 + .elseif (round_id == 58) + .set ROUND_FUNC, RND_F2 + .endif + + .set round_id, ( (\r+1) % 80) + + RND_FUN ROUND_FUNC, (\r+1) + ROTATE_STATE +.endm + +.macro ROUND_F1 r + add WK(\r), E + + andn C, A, T1 /* ~b&d */ + lea (RE,RTB), E /* Add F from the previous round */ + + rorx $(32-5), A, TA /* T2 = A >>> 5 */ + rorx $(32-30),A, TB /* b>>>2 for next round */ + + PRECALC (\r) /* msg scheduling for next 2 blocks */ + + /* + * Calculate F for the next round + * (b & c) ^ andn[b, d] + */ + and B, A /* b&c */ + xor T1, A /* F1 = (b&c) ^ (~b&d) */ + + lea (RE,RTA), E /* E += A >>> 5 */ +.endm + +.macro ROUND_F2 r + add WK(\r), E + lea (RE,RTB), E /* Add F from the previous round */ + + /* Calculate F for the next round */ + rorx $(32-5), A, TA /* T2 = A >>> 5 */ + .if ((round_id) < 79) + rorx $(32-30), A, TB /* b>>>2 for next round */ + .endif + PRECALC (\r) /* msg scheduling for next 2 blocks */ + + .if ((round_id) < 79) + xor B, A + .endif + + add TA, E /* E += A >>> 5 */ + + .if ((round_id) < 79) + xor C, A + .endif +.endm + +.macro ROUND_F3 r + add WK(\r), E + PRECALC (\r) /* msg scheduling for next 2 blocks */ + + lea (RE,RTB), E /* Add F from the previous round */ + + mov B, T1 + or A, T1 + + rorx $(32-5), A, TA /* T2 = A >>> 5 */ + rorx $(32-30), A, TB /* b>>>2 for next round */ + + /* Calculate F for the next round + * (b and c) or (d and (b or c)) + */ + and C, T1 + and B, A + or T1, A + + add TA, E /* E += A >>> 5 */ + +.endm + +/* + * macro implements 80 rounds of SHA-1, for multiple blocks with s/w pipelining + */ +.macro SHA1_PIPELINED_MAIN_BODY + + REGALLOC + + mov (HASH_PTR), A + mov 4(HASH_PTR), B + mov 8(HASH_PTR), C + mov 12(HASH_PTR), D + mov 16(HASH_PTR), E + + mov %rsp, PRECALC_BUF + lea (2*4*80+32)(%rsp), WK_BUF + + # Precalc WK for first 2 blocks + PRECALC_OFFSET = 0 + .set i, 0 + .rept 160 + PRECALC i + .set i, i + 1 + .endr + PRECALC_OFFSET = 128 + xchg WK_BUF, PRECALC_BUF + + .align 32 +_loop: + /* + * code loops through more than one block + * we use K_BASE value as a signal of a last block, + * it is set below by: cmovae BUFFER_PTR, K_BASE + */ + cmp K_BASE, BUFFER_PTR + jne _begin + .align 32 + jmp _end + .align 32 +_begin: + + /* + * Do first block + * rounds: 0,2,4,6,8 + */ + .set j, 0 + .rept 5 + RR j + .set j, j+2 + .endr + + jmp _loop0 +_loop0: + + /* + * rounds: + * 10,12,14,16,18 + * 20,22,24,26,28 + * 30,32,34,36,38 + * 40,42,44,46,48 + * 50,52,54,56,58 + */ + .rept 25 + RR j + .set j, j+2 + .endr + + add $(2*64), BUFFER_PTR /* move to next odd-64-byte block */ + cmp BUFFER_END, BUFFER_PTR /* is current block the last one? */ + cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */ + + /* + * rounds + * 60,62,64,66,68 + * 70,72,74,76,78 + */ + .rept 10 + RR j + .set j, j+2 + .endr + + UPDATE_HASH (HASH_PTR), A + UPDATE_HASH 4(HASH_PTR), TB + UPDATE_HASH 8(HASH_PTR), C + UPDATE_HASH 12(HASH_PTR), D + UPDATE_HASH 16(HASH_PTR), E + + cmp K_BASE, BUFFER_PTR /* is current block the last one? */ + je _loop + + mov TB, B + + /* Process second block */ + /* + * rounds + * 0+80, 2+80, 4+80, 6+80, 8+80 + * 10+80,12+80,14+80,16+80,18+80 + */ + + .set j, 0 + .rept 10 + RR j+80 + .set j, j+2 + .endr + + jmp _loop1 +_loop1: + /* + * rounds + * 20+80,22+80,24+80,26+80,28+80 + * 30+80,32+80,34+80,36+80,38+80 + */ + .rept 10 + RR j+80 + .set j, j+2 + .endr + + jmp _loop2 +_loop2: + + /* + * rounds + * 40+80,42+80,44+80,46+80,48+80 + * 50+80,52+80,54+80,56+80,58+80 + */ + .rept 10 + RR j+80 + .set j, j+2 + .endr + + add $(2*64), BUFFER_PTR2 /* move to next even-64-byte block */ + + cmp BUFFER_END, BUFFER_PTR2 /* is current block the last one */ + cmovae K_BASE, BUFFER_PTR /* signal the last iteration smartly */ + + jmp _loop3 +_loop3: + + /* + * rounds + * 60+80,62+80,64+80,66+80,68+80 + * 70+80,72+80,74+80,76+80,78+80 + */ + .rept 10 + RR j+80 + .set j, j+2 + .endr + + UPDATE_HASH (HASH_PTR), A + UPDATE_HASH 4(HASH_PTR), TB + UPDATE_HASH 8(HASH_PTR), C + UPDATE_HASH 12(HASH_PTR), D + UPDATE_HASH 16(HASH_PTR), E + + /* Reset state for AVX2 reg permutation */ + mov A, TA + mov TB, A + mov C, TB + mov E, C + mov D, B + mov TA, D + + REGALLOC + + xchg WK_BUF, PRECALC_BUF + + jmp _loop + + .align 32 + _end: + +.endm +/* + * macro implements SHA-1 function's body for several 64-byte blocks + * param: function's name + */ +.macro SHA1_VECTOR_ASM name + ENTRY(\name) + .align 4096 + + push %rbx + push %rbp + push %r12 + push %r13 + push %r14 + push %r15 + + RESERVE_STACK = (W_SIZE*4 + 8+24) + + /* Align stack */ + mov %rsp, %rbx + and $(0x1000-1), %rbx + sub $(8+32), %rbx + sub %rbx, %rsp + push %rbx + sub $RESERVE_STACK, %rsp + + avx2_zeroupper + + lea K_XMM_AR(%rip), K_BASE + + mov CTX, HASH_PTR + mov BUF, BUFFER_PTR + lea 64(BUF), BUFFER_PTR2 + + shl $6, CNT /* mul by 64 */ + add BUF, CNT + add $64, CNT + mov CNT, BUFFER_END + + cmp BUFFER_END, BUFFER_PTR2 + cmovae K_BASE, BUFFER_PTR2 + + xmm_mov BSWAP_SHUFB_CTL(%rip), YMM_SHUFB_BSWAP + + SHA1_PIPELINED_MAIN_BODY + + avx2_zeroupper + + add $RESERVE_STACK, %rsp + pop %rbx + add %rbx, %rsp + + pop %r15 + pop %r14 + pop %r13 + pop %r12 + pop %rbp + pop %rbx + + ret + + ENDPROC(\name) +.endm + +.section .rodata + +#define K1 0x5a827999 +#define K2 0x6ed9eba1 +#define K3 0x8f1bbcdc +#define K4 0xca62c1d6 + +.align 128 +K_XMM_AR: + .long K1, K1, K1, K1 + .long K1, K1, K1, K1 + .long K2, K2, K2, K2 + .long K2, K2, K2, K2 + .long K3, K3, K3, K3 + .long K3, K3, K3, K3 + .long K4, K4, K4, K4 + .long K4, K4, K4, K4 + +BSWAP_SHUFB_CTL: + .long 0x00010203 + .long 0x04050607 + .long 0x08090a0b + .long 0x0c0d0e0f + .long 0x00010203 + .long 0x04050607 + .long 0x08090a0b + .long 0x0c0d0e0f +.text + +SHA1_VECTOR_ASM sha1_transform_avx2 diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 4a11a9d72451..139a55c04d82 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -10,6 +10,7 @@ * Copyright (c) Andrew McDonald * Copyright (c) Jean-Francois Dive * Copyright (c) Mathias Krause + * Copyright (c) Chandramouli Narayanan * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -39,6 +40,12 @@ asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, asmlinkage void sha1_transform_avx(u32 *digest, const char *data, unsigned int rounds); #endif +#ifdef CONFIG_AS_AVX2 +#define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */ + +asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, + unsigned int rounds); +#endif static asmlinkage void (*sha1_transform_asm)(u32 *, const char *, unsigned int); @@ -165,6 +172,18 @@ static int sha1_ssse3_import(struct shash_desc *desc, const void *in) return 0; } +#ifdef CONFIG_AS_AVX2 +static void sha1_apply_transform_avx2(u32 *digest, const char *data, + unsigned int rounds) +{ + /* Select the optimal transform based on data block size */ + if (rounds >= SHA1_AVX2_BLOCK_OPTSIZE) + sha1_transform_avx2(digest, data, rounds); + else + sha1_transform_avx(digest, data, rounds); +} +#endif + static struct shash_alg alg = { .digestsize = SHA1_DIGEST_SIZE, .init = sha1_ssse3_init, @@ -189,7 +208,11 @@ static bool __init avx_usable(void) { u64 xcr0; +#if defined(CONFIG_AS_AVX2) + if (!cpu_has_avx || !cpu_has_avx2 || !cpu_has_osxsave) +#else if (!cpu_has_avx || !cpu_has_osxsave) +#endif return false; xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); @@ -205,23 +228,35 @@ static bool __init avx_usable(void) static int __init sha1_ssse3_mod_init(void) { + char *algo_name; /* test for SSSE3 first */ - if (cpu_has_ssse3) + if (cpu_has_ssse3) { sha1_transform_asm = sha1_transform_ssse3; + algo_name = "SSSE3"; + } #ifdef CONFIG_AS_AVX /* allow AVX to override SSSE3, it's a little faster */ - if (avx_usable()) - sha1_transform_asm = sha1_transform_avx; + if (avx_usable()) { + if (cpu_has_avx) { + sha1_transform_asm = sha1_transform_avx; + algo_name = "AVX"; + } +#ifdef CONFIG_AS_AVX2 + if (cpu_has_avx2 && boot_cpu_has(X86_FEATURE_BMI2)) { + /* allow AVX2 to override AVX, it's a little faster */ + sha1_transform_asm = sha1_apply_transform_avx2; + algo_name = "AVX2"; + } +#endif + } #endif if (sha1_transform_asm) { - pr_info("Using %s optimized SHA-1 implementation\n", - sha1_transform_asm == sha1_transform_ssse3 ? "SSSE3" - : "AVX"); + pr_info("Using %s optimized SHA-1 implementation\n", algo_name); return crypto_register_shash(&alg); } - pr_info("Neither AVX nor SSSE3 is available/usable.\n"); + pr_info("Neither AVX nor AVX2 nor SSSE3 is available/usable.\n"); return -ENODEV; } diff --git a/crypto/Kconfig b/crypto/Kconfig index 7bcb70d216e1..ce4012a58781 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -491,14 +491,14 @@ config CRYPTO_SHA1 SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2). config CRYPTO_SHA1_SSSE3 - tristate "SHA1 digest algorithm (SSSE3/AVX)" + tristate "SHA1 digest algorithm (SSSE3/AVX/AVX2)" depends on X86 && 64BIT select CRYPTO_SHA1 select CRYPTO_HASH help SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented using Supplemental SSE3 (SSSE3) instructions or Advanced Vector - Extensions (AVX), when available. + Extensions (AVX/AVX2), when available. config CRYPTO_SHA256_SSSE3 tristate "SHA256 digest algorithm (SSSE3/AVX/AVX2)" From 6ca5afb8c26991cf4f13a8bcca870ec2a9522bf7 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Mon, 24 Mar 2014 17:10:37 +0100 Subject: [PATCH 57/60] crypto: x86/sha1 - re-enable the AVX variant Commit 7c1da8d0d0 "crypto: sha - SHA1 transform x86_64 AVX2" accidentally disabled the AVX variant by making the avx_usable() test not only fail in case the CPU doesn't support AVX or OSXSAVE but also if it doesn't support AVX2. Fix that regression by splitting up the AVX/AVX2 test into two functions. Also test for the BMI1 extension in the avx2_usable() test as the AVX2 implementation not only makes use of BMI2 but also BMI1 instructions. Cc: Chandramouli Narayanan Signed-off-by: Mathias Krause Reviewed-by: H. Peter Anvin Reviewed-by: Marek Vasut Signed-off-by: Herbert Xu --- arch/x86/crypto/sha1_ssse3_glue.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 139a55c04d82..74d16ef707c7 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -208,11 +208,7 @@ static bool __init avx_usable(void) { u64 xcr0; -#if defined(CONFIG_AS_AVX2) - if (!cpu_has_avx || !cpu_has_avx2 || !cpu_has_osxsave) -#else if (!cpu_has_avx || !cpu_has_osxsave) -#endif return false; xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); @@ -224,11 +220,23 @@ static bool __init avx_usable(void) return true; } + +#ifdef CONFIG_AS_AVX2 +static bool __init avx2_usable(void) +{ + if (avx_usable() && cpu_has_avx2 && boot_cpu_has(X86_FEATURE_BMI1) && + boot_cpu_has(X86_FEATURE_BMI2)) + return true; + + return false; +} +#endif #endif static int __init sha1_ssse3_mod_init(void) { char *algo_name; + /* test for SSSE3 first */ if (cpu_has_ssse3) { sha1_transform_asm = sha1_transform_ssse3; @@ -238,13 +246,11 @@ static int __init sha1_ssse3_mod_init(void) #ifdef CONFIG_AS_AVX /* allow AVX to override SSSE3, it's a little faster */ if (avx_usable()) { - if (cpu_has_avx) { - sha1_transform_asm = sha1_transform_avx; - algo_name = "AVX"; - } + sha1_transform_asm = sha1_transform_avx; + algo_name = "AVX"; #ifdef CONFIG_AS_AVX2 - if (cpu_has_avx2 && boot_cpu_has(X86_FEATURE_BMI2)) { - /* allow AVX2 to override AVX, it's a little faster */ + /* allow AVX2 to override AVX, it's a little faster */ + if (avx2_usable()) { sha1_transform_asm = sha1_apply_transform_avx2; algo_name = "AVX2"; } From 6c8c17cc7a8806dde074d7c0bf4d519dd4d028c5 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Mon, 24 Mar 2014 17:10:38 +0100 Subject: [PATCH 58/60] crypto: x86/sha1 - fix stack alignment of AVX2 variant The AVX2 implementation might waste up to a page of stack memory because of a wrong alignment calculation. This will, in the worst case, increase the stack usage of sha1_transform_avx2() alone to 5.4 kB -- way to big for a kernel function. Even worse, it might also allocate *less* bytes than needed if the stack pointer is already aligned bacause in that case the 'sub %rbx, %rsp' is effectively moving the stack pointer upwards, not downwards. Fix those issues by changing and simplifying the alignment calculation to use a 32 byte alignment, the alignment really needed. Cc: Chandramouli Narayanan Signed-off-by: Mathias Krause Reviewed-by: H. Peter Anvin Reviewed-by: Marek Vasut Signed-off-by: Herbert Xu --- arch/x86/crypto/sha1_avx2_x86_64_asm.S | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S index 4f348544d132..bacac22b20c2 100644 --- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S +++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S @@ -636,9 +636,7 @@ _loop3: /* Align stack */ mov %rsp, %rbx - and $(0x1000-1), %rbx - sub $(8+32), %rbx - sub %rbx, %rsp + and $~(0x20-1), %rsp push %rbx sub $RESERVE_STACK, %rsp @@ -665,8 +663,7 @@ _loop3: avx2_zeroupper add $RESERVE_STACK, %rsp - pop %rbx - add %rbx, %rsp + pop %rsp pop %r15 pop %r14 From 37b28947170ebe55cb4f689ded5857d943c6578e Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Mon, 24 Mar 2014 17:10:39 +0100 Subject: [PATCH 59/60] crypto: x86/sha1 - reduce size of the AVX2 asm implementation There is really no need to page align sha1_transform_avx2. The default alignment is just fine. This is not the hot code but only the entry point, after all. Cc: Chandramouli Narayanan Signed-off-by: Mathias Krause Reviewed-by: H. Peter Anvin Reviewed-by: Marek Vasut Signed-off-by: Herbert Xu --- arch/x86/crypto/sha1_avx2_x86_64_asm.S | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/crypto/sha1_avx2_x86_64_asm.S b/arch/x86/crypto/sha1_avx2_x86_64_asm.S index bacac22b20c2..1cd792db15ef 100644 --- a/arch/x86/crypto/sha1_avx2_x86_64_asm.S +++ b/arch/x86/crypto/sha1_avx2_x86_64_asm.S @@ -623,7 +623,6 @@ _loop3: */ .macro SHA1_VECTOR_ASM name ENTRY(\name) - .align 4096 push %rbx push %rbp From 8ceee72808d1ae3fb191284afc2257a2be964725 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 27 Mar 2014 18:14:40 +0100 Subject: [PATCH 60/60] crypto: ghash-clmulni-intel - use C implementation for setkey() The GHASH setkey() function uses SSE registers but fails to call kernel_fpu_begin()/kernel_fpu_end(). Instead of adding these calls, and then having to deal with the restriction that they cannot be called from interrupt context, move the setkey() implementation to the C domain. Note that setkey() does not use any particular SSE features and is not expected to become a performance bottleneck. Signed-off-by: Ard Biesheuvel Acked-by: H. Peter Anvin Fixes: 0e1227d356e9b (crypto: ghash - Add PCLMULQDQ accelerated implementation) Signed-off-by: Herbert Xu --- arch/x86/crypto/ghash-clmulni-intel_asm.S | 29 ---------------------- arch/x86/crypto/ghash-clmulni-intel_glue.c | 14 ++++++++--- 2 files changed, 11 insertions(+), 32 deletions(-) diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S index 586f41aac361..185fad49d86f 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_asm.S +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S @@ -24,10 +24,6 @@ .align 16 .Lbswap_mask: .octa 0x000102030405060708090a0b0c0d0e0f -.Lpoly: - .octa 0xc2000000000000000000000000000001 -.Ltwo_one: - .octa 0x00000001000000000000000000000001 #define DATA %xmm0 #define SHASH %xmm1 @@ -134,28 +130,3 @@ ENTRY(clmul_ghash_update) .Lupdate_just_ret: ret ENDPROC(clmul_ghash_update) - -/* - * void clmul_ghash_setkey(be128 *shash, const u8 *key); - * - * Calculate hash_key << 1 mod poly - */ -ENTRY(clmul_ghash_setkey) - movaps .Lbswap_mask, BSWAP - movups (%rsi), %xmm0 - PSHUFB_XMM BSWAP %xmm0 - movaps %xmm0, %xmm1 - psllq $1, %xmm0 - psrlq $63, %xmm1 - movaps %xmm1, %xmm2 - pslldq $8, %xmm1 - psrldq $8, %xmm2 - por %xmm1, %xmm0 - # reduction - pshufd $0b00100100, %xmm2, %xmm1 - pcmpeqd .Ltwo_one, %xmm1 - pand .Lpoly, %xmm1 - pxor %xmm1, %xmm0 - movups %xmm0, (%rdi) - ret -ENDPROC(clmul_ghash_setkey) diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 6759dd1135be..d785cf2c529c 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -30,8 +30,6 @@ void clmul_ghash_mul(char *dst, const be128 *shash); void clmul_ghash_update(char *dst, const char *src, unsigned int srclen, const be128 *shash); -void clmul_ghash_setkey(be128 *shash, const u8 *key); - struct ghash_async_ctx { struct cryptd_ahash *cryptd_tfm; }; @@ -58,13 +56,23 @@ static int ghash_setkey(struct crypto_shash *tfm, const u8 *key, unsigned int keylen) { struct ghash_ctx *ctx = crypto_shash_ctx(tfm); + be128 *x = (be128 *)key; + u64 a, b; if (keylen != GHASH_BLOCK_SIZE) { crypto_shash_set_flags(tfm, CRYPTO_TFM_RES_BAD_KEY_LEN); return -EINVAL; } - clmul_ghash_setkey(&ctx->shash, key); + /* perform multiplication by 'x' in GF(2^128) */ + a = be64_to_cpu(x->a); + b = be64_to_cpu(x->b); + + ctx->shash.a = (__be64)((b << 1) | (a >> 63)); + ctx->shash.b = (__be64)((a << 1) | (b >> 63)); + + if (a >> 63) + ctx->shash.b ^= cpu_to_be64(0xc2); return 0; }