From 981a2e3e45c554611316b6eb28d00204586637b3 Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Tue, 25 Apr 2017 11:25:40 +0300 Subject: crypto: tcrypt - don't disable irqs and wait MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The tcrypt AEAD cycles speed tests disables irqs during the test, which is broken at the very least since commit '1425d2d17f7309c6 ("crypto: tcrypt - Fix AEAD speed tests")' adds a wait for completion as part of the test and probably since switching to the new AEAD API. While the result of taking a cycle count diff may not mean much on SMP systems if the task migrates, it's good enough for tcrypt being the quick & dirty dev tool it is. It's also what all the other (i.e. hash) cycle speed tests do. Signed-off-by: Gilad Ben-Yossef Reported-by: Ofir Drang Reviewed-by: Horia Geantă Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 9a11f3c2bf98..0dd6a432d6ca 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -138,8 +138,6 @@ static int test_aead_cycles(struct aead_request *req, int enc, int blen) int ret = 0; int i; - local_irq_disable(); - /* Warm-up run. */ for (i = 0; i < 4; i++) { if (enc) @@ -169,8 +167,6 @@ static int test_aead_cycles(struct aead_request *req, int enc, int blen) } out: - local_irq_enable(); - if (ret == 0) printk("1 operation in %lu cycles (%d bytes)\n", (cycles + 4) / 8, blen); -- cgit v1.2.3 From 15a9b363de232236a31922d5a928d1830bc42060 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Tue, 25 Apr 2017 12:18:54 +0300 Subject: crypto: sha512-mb - add some missing unlock on error We recently added some new locking but missed the unlocks on these error paths in sha512_ctx_mgr_submit(). Fixes: c459bd7beda0 ("crypto: sha512-mb - Protect sha512 mb ctx mgr access") Signed-off-by: Dan Carpenter Acked-by: Tim Chen Signed-off-by: Herbert Xu --- arch/x86/crypto/sha512-mb/sha512_mb.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c index 2dd3674b5a1e..458409b7568d 100644 --- a/arch/x86/crypto/sha512-mb/sha512_mb.c +++ b/arch/x86/crypto/sha512-mb/sha512_mb.c @@ -269,19 +269,19 @@ static struct sha512_hash_ctx * LAST */ ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; - return ctx; + goto unlock; } if (ctx->status & HASH_CTX_STS_PROCESSING) { /* Cannot submit to a currently processing job. */ ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING; - return ctx; + goto unlock; } if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) { /* Cannot update a finished job. */ ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED; - return ctx; + goto unlock; } @@ -363,6 +363,7 @@ static struct sha512_hash_ctx } ctx = sha512_ctx_mgr_resubmit(mgr, ctx); +unlock: spin_unlock_irqrestore(&cstate->work_lock, irqflags); return ctx; } -- cgit v1.2.3 From 7fcaf62a9f6348bcdc8d2816f93f64cf7f3f87fc Mon Sep 17 00:00:00 2001 From: Tudor Ambarus Date: Tue, 25 Apr 2017 16:26:36 +0300 Subject: crypto: caam - avoid kzalloc(0) in caam_read_raw_data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function returns NULL if buf is composed only of zeros. Signed-off-by: Tudor Ambarus Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/caampkc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c index 49cbdcba7883..999ba18495b0 100644 --- a/drivers/crypto/caam/caampkc.c +++ b/drivers/crypto/caam/caampkc.c @@ -374,6 +374,8 @@ static inline u8 *caam_read_raw_data(const u8 *buf, size_t *nbytes) buf++; (*nbytes)--; } + if (!*nbytes) + return NULL; val = kzalloc(*nbytes, GFP_DMA | GFP_KERNEL); if (!val) -- cgit v1.2.3 From 7ca4a9a10fe82ee50ce0da02c72791ecf7c83869 Mon Sep 17 00:00:00 2001 From: Radu Alexe Date: Tue, 25 Apr 2017 16:26:37 +0300 Subject: crypto: caam - incapsulate dropping leading zeros into function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This function will be used into further patches. Signed-off-by: Radu Alexe Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/caampkc.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c index 999ba18495b0..d2c6977ba82e 100644 --- a/drivers/crypto/caam/caampkc.c +++ b/drivers/crypto/caam/caampkc.c @@ -357,6 +357,14 @@ static void caam_rsa_free_key(struct caam_rsa_key *key) key->n_sz = 0; } +static void caam_rsa_drop_leading_zeros(const u8 **ptr, size_t *nbytes) +{ + while (!**ptr && *nbytes) { + (*ptr)++; + (*nbytes)--; + } +} + /** * caam_read_raw_data - Read a raw byte stream as a positive integer. * The function skips buffer's leading zeros, copies the remained data @@ -370,10 +378,7 @@ static inline u8 *caam_read_raw_data(const u8 *buf, size_t *nbytes) { u8 *val; - while (!*buf && *nbytes) { - buf++; - (*nbytes)--; - } + caam_rsa_drop_leading_zeros(&buf, nbytes); if (!*nbytes) return NULL; -- cgit v1.2.3 From 52e26d77b8b3de2e9ed6c7126f68e04cad5fe852 Mon Sep 17 00:00:00 2001 From: Radu Alexe Date: Tue, 25 Apr 2017 16:26:38 +0300 Subject: crypto: caam - add support for RSA key form 2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CAAM RSA private key may have either of three representations. 1. The first representation consists of the pair (n, d), where the components have the following meanings: n the RSA modulus d the RSA private exponent 2. The second representation consists of the triplet (p, q, d), where the components have the following meanings: p the first prime factor of the RSA modulus n q the second prime factor of the RSA modulus n d the RSA private exponent 3. The third representation consists of the quintuple (p, q, dP, dQ, qInv), where the components have the following meanings: p the first prime factor of the RSA modulus n q the second prime factor of the RSA modulus n dP the first factors's CRT exponent dQ the second factors's CRT exponent qInv the (first) CRT coefficient The benefit of using the third or the second key form is lower computational cost for the decryption and signature operations. This patch adds support for the second RSA private key representation. Signed-off-by: Tudor Ambarus Signed-off-by: Radu Alexe Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/caampkc.c | 231 ++++++++++++++++++++++++++++++++++++++--- drivers/crypto/caam/caampkc.h | 38 +++++++ drivers/crypto/caam/pdb.h | 29 ++++++ drivers/crypto/caam/pkc_desc.c | 17 +++ 4 files changed, 298 insertions(+), 17 deletions(-) diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c index d2c6977ba82e..912e41700522 100644 --- a/drivers/crypto/caam/caampkc.c +++ b/drivers/crypto/caam/caampkc.c @@ -18,6 +18,8 @@ #define DESC_RSA_PUB_LEN (2 * CAAM_CMD_SZ + sizeof(struct rsa_pub_pdb)) #define DESC_RSA_PRIV_F1_LEN (2 * CAAM_CMD_SZ + \ sizeof(struct rsa_priv_f1_pdb)) +#define DESC_RSA_PRIV_F2_LEN (2 * CAAM_CMD_SZ + \ + sizeof(struct rsa_priv_f2_pdb)) static void rsa_io_unmap(struct device *dev, struct rsa_edesc *edesc, struct akcipher_request *req) @@ -54,6 +56,23 @@ static void rsa_priv_f1_unmap(struct device *dev, struct rsa_edesc *edesc, dma_unmap_single(dev, pdb->d_dma, key->d_sz, DMA_TO_DEVICE); } +static void rsa_priv_f2_unmap(struct device *dev, struct rsa_edesc *edesc, + struct akcipher_request *req) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct rsa_priv_f2_pdb *pdb = &edesc->pdb.priv_f2; + size_t p_sz = key->p_sz; + size_t q_sz = key->p_sz; + + dma_unmap_single(dev, pdb->d_dma, key->d_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->p_dma, p_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->q_dma, q_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->tmp2_dma, q_sz, DMA_TO_DEVICE); +} + /* RSA Job Completion handler */ static void rsa_pub_done(struct device *dev, u32 *desc, u32 err, void *context) { @@ -90,6 +109,24 @@ static void rsa_priv_f1_done(struct device *dev, u32 *desc, u32 err, akcipher_request_complete(req, err); } +static void rsa_priv_f2_done(struct device *dev, u32 *desc, u32 err, + void *context) +{ + struct akcipher_request *req = context; + struct rsa_edesc *edesc; + + if (err) + caam_jr_strstatus(dev, err); + + edesc = container_of(desc, struct rsa_edesc, hw_desc[0]); + + rsa_priv_f2_unmap(dev, edesc, req); + rsa_io_unmap(dev, edesc, req); + kfree(edesc); + + akcipher_request_complete(req, err); +} + static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req, size_t desclen) { @@ -258,6 +295,81 @@ static int set_rsa_priv_f1_pdb(struct akcipher_request *req, return 0; } +static int set_rsa_priv_f2_pdb(struct akcipher_request *req, + struct rsa_edesc *edesc) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct device *dev = ctx->dev; + struct rsa_priv_f2_pdb *pdb = &edesc->pdb.priv_f2; + int sec4_sg_index = 0; + size_t p_sz = key->p_sz; + size_t q_sz = key->p_sz; + + pdb->d_dma = dma_map_single(dev, key->d, key->d_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->d_dma)) { + dev_err(dev, "Unable to map RSA private exponent memory\n"); + return -ENOMEM; + } + + pdb->p_dma = dma_map_single(dev, key->p, p_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->p_dma)) { + dev_err(dev, "Unable to map RSA prime factor p memory\n"); + goto unmap_d; + } + + pdb->q_dma = dma_map_single(dev, key->q, q_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->q_dma)) { + dev_err(dev, "Unable to map RSA prime factor q memory\n"); + goto unmap_p; + } + + pdb->tmp1_dma = dma_map_single(dev, key->tmp1, p_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->tmp1_dma)) { + dev_err(dev, "Unable to map RSA tmp1 memory\n"); + goto unmap_q; + } + + pdb->tmp2_dma = dma_map_single(dev, key->tmp2, q_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->tmp2_dma)) { + dev_err(dev, "Unable to map RSA tmp2 memory\n"); + goto unmap_tmp1; + } + + if (edesc->src_nents > 1) { + pdb->sgf |= RSA_PRIV_PDB_SGF_G; + pdb->g_dma = edesc->sec4_sg_dma; + sec4_sg_index += edesc->src_nents; + } else { + pdb->g_dma = sg_dma_address(req->src); + } + + if (edesc->dst_nents > 1) { + pdb->sgf |= RSA_PRIV_PDB_SGF_F; + pdb->f_dma = edesc->sec4_sg_dma + + sec4_sg_index * sizeof(struct sec4_sg_entry); + } else { + pdb->f_dma = sg_dma_address(req->dst); + } + + pdb->sgf |= (key->d_sz << RSA_PDB_D_SHIFT) | key->n_sz; + pdb->p_q_len = (q_sz << RSA_PDB_Q_SHIFT) | p_sz; + + return 0; + +unmap_tmp1: + dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE); +unmap_q: + dma_unmap_single(dev, pdb->q_dma, q_sz, DMA_TO_DEVICE); +unmap_p: + dma_unmap_single(dev, pdb->p_dma, p_sz, DMA_TO_DEVICE); +unmap_d: + dma_unmap_single(dev, pdb->d_dma, key->d_sz, DMA_TO_DEVICE); + + return -ENOMEM; +} + static int caam_rsa_enc(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); @@ -301,24 +413,14 @@ init_fail: return ret; } -static int caam_rsa_dec(struct akcipher_request *req) +static int caam_rsa_dec_priv_f1(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); - struct caam_rsa_key *key = &ctx->key; struct device *jrdev = ctx->dev; struct rsa_edesc *edesc; int ret; - if (unlikely(!key->n || !key->d)) - return -EINVAL; - - if (req->dst_len < key->n_sz) { - req->dst_len = key->n_sz; - dev_err(jrdev, "Output buffer length less than parameter n\n"); - return -EOVERFLOW; - } - /* Allocate extended descriptor */ edesc = rsa_edesc_alloc(req, DESC_RSA_PRIV_F1_LEN); if (IS_ERR(edesc)) @@ -344,17 +446,73 @@ init_fail: return ret; } +static int caam_rsa_dec_priv_f2(struct akcipher_request *req) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct device *jrdev = ctx->dev; + struct rsa_edesc *edesc; + int ret; + + /* Allocate extended descriptor */ + edesc = rsa_edesc_alloc(req, DESC_RSA_PRIV_F2_LEN); + if (IS_ERR(edesc)) + return PTR_ERR(edesc); + + /* Set RSA Decrypt Protocol Data Block - Private Key Form #2 */ + ret = set_rsa_priv_f2_pdb(req, edesc); + if (ret) + goto init_fail; + + /* Initialize Job Descriptor */ + init_rsa_priv_f2_desc(edesc->hw_desc, &edesc->pdb.priv_f2); + + ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f2_done, req); + if (!ret) + return -EINPROGRESS; + + rsa_priv_f2_unmap(jrdev, edesc, req); + +init_fail: + rsa_io_unmap(jrdev, edesc, req); + kfree(edesc); + return ret; +} + +static int caam_rsa_dec(struct akcipher_request *req) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + int ret; + + if (unlikely(!key->n || !key->d)) + return -EINVAL; + + if (req->dst_len < key->n_sz) { + req->dst_len = key->n_sz; + dev_err(ctx->dev, "Output buffer length less than parameter n\n"); + return -EOVERFLOW; + } + + if (key->priv_form == FORM2) + ret = caam_rsa_dec_priv_f2(req); + else + ret = caam_rsa_dec_priv_f1(req); + + return ret; +} + static void caam_rsa_free_key(struct caam_rsa_key *key) { kzfree(key->d); + kzfree(key->p); + kzfree(key->q); + kzfree(key->tmp1); + kzfree(key->tmp2); kfree(key->e); kfree(key->n); - key->d = NULL; - key->e = NULL; - key->n = NULL; - key->d_sz = 0; - key->e_sz = 0; - key->n_sz = 0; + memset(key, 0, sizeof(*key)); } static void caam_rsa_drop_leading_zeros(const u8 **ptr, size_t *nbytes) @@ -444,6 +602,43 @@ err: return -ENOMEM; } +static void caam_rsa_set_priv_key_form(struct caam_rsa_ctx *ctx, + struct rsa_key *raw_key) +{ + struct caam_rsa_key *rsa_key = &ctx->key; + size_t p_sz = raw_key->p_sz; + size_t q_sz = raw_key->q_sz; + + rsa_key->p = caam_read_raw_data(raw_key->p, &p_sz); + if (!rsa_key->p) + return; + rsa_key->p_sz = p_sz; + + rsa_key->q = caam_read_raw_data(raw_key->q, &q_sz); + if (!rsa_key->q) + goto free_p; + rsa_key->q_sz = q_sz; + + rsa_key->tmp1 = kzalloc(raw_key->p_sz, GFP_DMA | GFP_KERNEL); + if (!rsa_key->tmp1) + goto free_q; + + rsa_key->tmp2 = kzalloc(raw_key->q_sz, GFP_DMA | GFP_KERNEL); + if (!rsa_key->tmp2) + goto free_tmp1; + + rsa_key->priv_form = FORM2; + + return; + +free_tmp1: + kzfree(rsa_key->tmp1); +free_q: + kzfree(rsa_key->q); +free_p: + kzfree(rsa_key->p); +} + static int caam_rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key, unsigned int keylen) { @@ -490,6 +685,8 @@ static int caam_rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key, memcpy(rsa_key->d, raw_key.d, raw_key.d_sz); memcpy(rsa_key->e, raw_key.e, raw_key.e_sz); + caam_rsa_set_priv_key_form(ctx, &raw_key); + return 0; err: diff --git a/drivers/crypto/caam/caampkc.h b/drivers/crypto/caam/caampkc.h index f595d159b112..48e408ceaf75 100644 --- a/drivers/crypto/caam/caampkc.h +++ b/drivers/crypto/caam/caampkc.h @@ -12,22 +12,58 @@ #include "compat.h" #include "pdb.h" +/** + * caam_priv_key_form - CAAM RSA private key representation + * CAAM RSA private key may have either of two forms. + * + * 1. The first representation consists of the pair (n, d), where the + * components have the following meanings: + * n the RSA modulus + * d the RSA private exponent + * + * 2. The second representation consists of the triplet (p, q, d), where the + * components have the following meanings: + * p the first prime factor of the RSA modulus n + * q the second prime factor of the RSA modulus n + * d the RSA private exponent + */ +enum caam_priv_key_form { + FORM1, + FORM2, +}; + /** * caam_rsa_key - CAAM RSA key structure. Keys are allocated in DMA zone. * @n : RSA modulus raw byte stream * @e : RSA public exponent raw byte stream * @d : RSA private exponent raw byte stream + * @p : RSA prime factor p of RSA modulus n + * @q : RSA prime factor q of RSA modulus n + * @tmp1 : CAAM uses this temporary buffer as internal state buffer. + * It is assumed to be as long as p. + * @tmp2 : CAAM uses this temporary buffer as internal state buffer. + * It is assumed to be as long as q. * @n_sz : length in bytes of RSA modulus n * @e_sz : length in bytes of RSA public exponent * @d_sz : length in bytes of RSA private exponent + * @p_sz : length in bytes of RSA prime factor p of RSA modulus n + * @q_sz : length in bytes of RSA prime factor q of RSA modulus n + * @priv_form : CAAM RSA private key representation */ struct caam_rsa_key { u8 *n; u8 *e; u8 *d; + u8 *p; + u8 *q; + u8 *tmp1; + u8 *tmp2; size_t n_sz; size_t e_sz; size_t d_sz; + size_t p_sz; + size_t q_sz; + enum caam_priv_key_form priv_form; }; /** @@ -59,6 +95,7 @@ struct rsa_edesc { union { struct rsa_pub_pdb pub; struct rsa_priv_f1_pdb priv_f1; + struct rsa_priv_f2_pdb priv_f2; } pdb; u32 hw_desc[]; }; @@ -66,5 +103,6 @@ struct rsa_edesc { /* Descriptor construction primitives. */ void init_rsa_pub_desc(u32 *desc, struct rsa_pub_pdb *pdb); void init_rsa_priv_f1_desc(u32 *desc, struct rsa_priv_f1_pdb *pdb); +void init_rsa_priv_f2_desc(u32 *desc, struct rsa_priv_f2_pdb *pdb); #endif diff --git a/drivers/crypto/caam/pdb.h b/drivers/crypto/caam/pdb.h index aaa00dd1c601..8ea5e612927e 100644 --- a/drivers/crypto/caam/pdb.h +++ b/drivers/crypto/caam/pdb.h @@ -483,6 +483,8 @@ struct dsa_verify_pdb { #define RSA_PDB_E_MASK (0xFFF << RSA_PDB_E_SHIFT) #define RSA_PDB_D_SHIFT 12 #define RSA_PDB_D_MASK (0xFFF << RSA_PDB_D_SHIFT) +#define RSA_PDB_Q_SHIFT 12 +#define RSA_PDB_Q_MASK (0xFFF << RSA_PDB_Q_SHIFT) #define RSA_PDB_SGF_F (0x8 << RSA_PDB_SGF_SHIFT) #define RSA_PDB_SGF_G (0x4 << RSA_PDB_SGF_SHIFT) @@ -490,6 +492,7 @@ struct dsa_verify_pdb { #define RSA_PRIV_PDB_SGF_G (0x8 << RSA_PDB_SGF_SHIFT) #define RSA_PRIV_KEY_FRM_1 0 +#define RSA_PRIV_KEY_FRM_2 1 /** * RSA Encrypt Protocol Data Block @@ -525,4 +528,30 @@ struct rsa_priv_f1_pdb { dma_addr_t d_dma; } __packed; +/** + * RSA Decrypt PDB - Private Key Form #2 + * @sgf : scatter-gather field + * @g_dma : dma address of encrypted input data + * @f_dma : dma address of output data + * @d_dma : dma address of RSA private exponent + * @p_dma : dma address of RSA prime factor p of RSA modulus n + * @q_dma : dma address of RSA prime factor q of RSA modulus n + * @tmp1_dma: dma address of temporary buffer. CAAM uses this temporary buffer + * as internal state buffer. It is assumed to be as long as p. + * @tmp2_dma: dma address of temporary buffer. CAAM uses this temporary buffer + * as internal state buffer. It is assumed to be as long as q. + * @p_q_len : length in bytes of first two prime factors of the RSA modulus n + */ +struct rsa_priv_f2_pdb { + u32 sgf; + dma_addr_t g_dma; + dma_addr_t f_dma; + dma_addr_t d_dma; + dma_addr_t p_dma; + dma_addr_t q_dma; + dma_addr_t tmp1_dma; + dma_addr_t tmp2_dma; + u32 p_q_len; +} __packed; + #endif diff --git a/drivers/crypto/caam/pkc_desc.c b/drivers/crypto/caam/pkc_desc.c index 4e4183e615ea..9d03c7055ed7 100644 --- a/drivers/crypto/caam/pkc_desc.c +++ b/drivers/crypto/caam/pkc_desc.c @@ -34,3 +34,20 @@ void init_rsa_priv_f1_desc(u32 *desc, struct rsa_priv_f1_pdb *pdb) append_operation(desc, OP_TYPE_UNI_PROTOCOL | OP_PCLID_RSADEC_PRVKEY | RSA_PRIV_KEY_FRM_1); } + +/* Descriptor for RSA Private operation - Private Key Form #2 */ +void init_rsa_priv_f2_desc(u32 *desc, struct rsa_priv_f2_pdb *pdb) +{ + init_job_desc_pdb(desc, 0, sizeof(*pdb)); + append_cmd(desc, pdb->sgf); + append_ptr(desc, pdb->g_dma); + append_ptr(desc, pdb->f_dma); + append_ptr(desc, pdb->d_dma); + append_ptr(desc, pdb->p_dma); + append_ptr(desc, pdb->q_dma); + append_ptr(desc, pdb->tmp1_dma); + append_ptr(desc, pdb->tmp2_dma); + append_cmd(desc, pdb->p_q_len); + append_operation(desc, OP_TYPE_UNI_PROTOCOL | OP_PCLID_RSADEC_PRVKEY | + RSA_PRIV_KEY_FRM_2); +} -- cgit v1.2.3 From 4a651b122adb8d3a9cb1c5dddfd63e708ee9bdd2 Mon Sep 17 00:00:00 2001 From: Radu Alexe Date: Tue, 25 Apr 2017 16:26:39 +0300 Subject: crypto: caam - add support for RSA key form 3 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CAAM RSA private key may have either of three representations. 1. The first representation consists of the pair (n, d), where the components have the following meanings: n the RSA modulus d the RSA private exponent 2. The second representation consists of the triplet (p, q, d), where the components have the following meanings: p the first prime factor of the RSA modulus n q the second prime factor of the RSA modulus n d the RSA private exponent 3. The third representation consists of the quintuple (p, q, dP, dQ, qInv), where the components have the following meanings: p the first prime factor of the RSA modulus n q the second prime factor of the RSA modulus n dP the first factors's CRT exponent dQ the second factors's CRT exponent qInv the (first) CRT coefficient The benefit of using the third or the second key form is lower computational cost for the decryption and signature operations. This patch adds support for the third RSA private key representations and extends caampkc to use the fastest key when all related components are present in the private key. Signed-off-by: Tudor Ambarus Signed-off-by: Radu Alexe Signed-off-by: Horia Geantă Signed-off-by: Herbert Xu --- drivers/crypto/caam/caampkc.c | 219 ++++++++++++++++++++++++++++++++++++++++- drivers/crypto/caam/caampkc.h | 22 ++++- drivers/crypto/caam/pdb.h | 33 +++++++ drivers/crypto/caam/pkc_desc.c | 19 ++++ 4 files changed, 291 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c index 912e41700522..57f399caa977 100644 --- a/drivers/crypto/caam/caampkc.c +++ b/drivers/crypto/caam/caampkc.c @@ -20,6 +20,8 @@ sizeof(struct rsa_priv_f1_pdb)) #define DESC_RSA_PRIV_F2_LEN (2 * CAAM_CMD_SZ + \ sizeof(struct rsa_priv_f2_pdb)) +#define DESC_RSA_PRIV_F3_LEN (2 * CAAM_CMD_SZ + \ + sizeof(struct rsa_priv_f3_pdb)) static void rsa_io_unmap(struct device *dev, struct rsa_edesc *edesc, struct akcipher_request *req) @@ -73,6 +75,25 @@ static void rsa_priv_f2_unmap(struct device *dev, struct rsa_edesc *edesc, dma_unmap_single(dev, pdb->tmp2_dma, q_sz, DMA_TO_DEVICE); } +static void rsa_priv_f3_unmap(struct device *dev, struct rsa_edesc *edesc, + struct akcipher_request *req) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct rsa_priv_f3_pdb *pdb = &edesc->pdb.priv_f3; + size_t p_sz = key->p_sz; + size_t q_sz = key->p_sz; + + dma_unmap_single(dev, pdb->p_dma, p_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->q_dma, q_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->dp_dma, p_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->dq_dma, q_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->c_dma, p_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->tmp2_dma, q_sz, DMA_TO_DEVICE); +} + /* RSA Job Completion handler */ static void rsa_pub_done(struct device *dev, u32 *desc, u32 err, void *context) { @@ -127,6 +148,24 @@ static void rsa_priv_f2_done(struct device *dev, u32 *desc, u32 err, akcipher_request_complete(req, err); } +static void rsa_priv_f3_done(struct device *dev, u32 *desc, u32 err, + void *context) +{ + struct akcipher_request *req = context; + struct rsa_edesc *edesc; + + if (err) + caam_jr_strstatus(dev, err); + + edesc = container_of(desc, struct rsa_edesc, hw_desc[0]); + + rsa_priv_f3_unmap(dev, edesc, req); + rsa_io_unmap(dev, edesc, req); + kfree(edesc); + + akcipher_request_complete(req, err); +} + static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req, size_t desclen) { @@ -370,6 +409,97 @@ unmap_d: return -ENOMEM; } +static int set_rsa_priv_f3_pdb(struct akcipher_request *req, + struct rsa_edesc *edesc) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct device *dev = ctx->dev; + struct rsa_priv_f3_pdb *pdb = &edesc->pdb.priv_f3; + int sec4_sg_index = 0; + size_t p_sz = key->p_sz; + size_t q_sz = key->p_sz; + + pdb->p_dma = dma_map_single(dev, key->p, p_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->p_dma)) { + dev_err(dev, "Unable to map RSA prime factor p memory\n"); + return -ENOMEM; + } + + pdb->q_dma = dma_map_single(dev, key->q, q_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->q_dma)) { + dev_err(dev, "Unable to map RSA prime factor q memory\n"); + goto unmap_p; + } + + pdb->dp_dma = dma_map_single(dev, key->dp, p_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->dp_dma)) { + dev_err(dev, "Unable to map RSA exponent dp memory\n"); + goto unmap_q; + } + + pdb->dq_dma = dma_map_single(dev, key->dq, q_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->dq_dma)) { + dev_err(dev, "Unable to map RSA exponent dq memory\n"); + goto unmap_dp; + } + + pdb->c_dma = dma_map_single(dev, key->qinv, p_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->c_dma)) { + dev_err(dev, "Unable to map RSA CRT coefficient qinv memory\n"); + goto unmap_dq; + } + + pdb->tmp1_dma = dma_map_single(dev, key->tmp1, p_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->tmp1_dma)) { + dev_err(dev, "Unable to map RSA tmp1 memory\n"); + goto unmap_qinv; + } + + pdb->tmp2_dma = dma_map_single(dev, key->tmp2, q_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->tmp2_dma)) { + dev_err(dev, "Unable to map RSA tmp2 memory\n"); + goto unmap_tmp1; + } + + if (edesc->src_nents > 1) { + pdb->sgf |= RSA_PRIV_PDB_SGF_G; + pdb->g_dma = edesc->sec4_sg_dma; + sec4_sg_index += edesc->src_nents; + } else { + pdb->g_dma = sg_dma_address(req->src); + } + + if (edesc->dst_nents > 1) { + pdb->sgf |= RSA_PRIV_PDB_SGF_F; + pdb->f_dma = edesc->sec4_sg_dma + + sec4_sg_index * sizeof(struct sec4_sg_entry); + } else { + pdb->f_dma = sg_dma_address(req->dst); + } + + pdb->sgf |= key->n_sz; + pdb->p_q_len = (q_sz << RSA_PDB_Q_SHIFT) | p_sz; + + return 0; + +unmap_tmp1: + dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE); +unmap_qinv: + dma_unmap_single(dev, pdb->c_dma, p_sz, DMA_TO_DEVICE); +unmap_dq: + dma_unmap_single(dev, pdb->dq_dma, q_sz, DMA_TO_DEVICE); +unmap_dp: + dma_unmap_single(dev, pdb->dp_dma, p_sz, DMA_TO_DEVICE); +unmap_q: + dma_unmap_single(dev, pdb->q_dma, q_sz, DMA_TO_DEVICE); +unmap_p: + dma_unmap_single(dev, pdb->p_dma, p_sz, DMA_TO_DEVICE); + + return -ENOMEM; +} + static int caam_rsa_enc(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); @@ -479,6 +609,39 @@ init_fail: return ret; } +static int caam_rsa_dec_priv_f3(struct akcipher_request *req) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct device *jrdev = ctx->dev; + struct rsa_edesc *edesc; + int ret; + + /* Allocate extended descriptor */ + edesc = rsa_edesc_alloc(req, DESC_RSA_PRIV_F3_LEN); + if (IS_ERR(edesc)) + return PTR_ERR(edesc); + + /* Set RSA Decrypt Protocol Data Block - Private Key Form #3 */ + ret = set_rsa_priv_f3_pdb(req, edesc); + if (ret) + goto init_fail; + + /* Initialize Job Descriptor */ + init_rsa_priv_f3_desc(edesc->hw_desc, &edesc->pdb.priv_f3); + + ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f3_done, req); + if (!ret) + return -EINPROGRESS; + + rsa_priv_f3_unmap(jrdev, edesc, req); + +init_fail: + rsa_io_unmap(jrdev, edesc, req); + kfree(edesc); + return ret; +} + static int caam_rsa_dec(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); @@ -495,7 +658,9 @@ static int caam_rsa_dec(struct akcipher_request *req) return -EOVERFLOW; } - if (key->priv_form == FORM2) + if (key->priv_form == FORM3) + ret = caam_rsa_dec_priv_f3(req); + else if (key->priv_form == FORM2) ret = caam_rsa_dec_priv_f2(req); else ret = caam_rsa_dec_priv_f1(req); @@ -508,6 +673,9 @@ static void caam_rsa_free_key(struct caam_rsa_key *key) kzfree(key->d); kzfree(key->p); kzfree(key->q); + kzfree(key->dp); + kzfree(key->dq); + kzfree(key->qinv); kzfree(key->tmp1); kzfree(key->tmp2); kfree(key->e); @@ -523,6 +691,34 @@ static void caam_rsa_drop_leading_zeros(const u8 **ptr, size_t *nbytes) } } +/** + * caam_read_rsa_crt - Used for reading dP, dQ, qInv CRT members. + * dP, dQ and qInv could decode to less than corresponding p, q length, as the + * BER-encoding requires that the minimum number of bytes be used to encode the + * integer. dP, dQ, qInv decoded values have to be zero-padded to appropriate + * length. + * + * @ptr : pointer to {dP, dQ, qInv} CRT member + * @nbytes: length in bytes of {dP, dQ, qInv} CRT member + * @dstlen: length in bytes of corresponding p or q prime factor + */ +static u8 *caam_read_rsa_crt(const u8 *ptr, size_t nbytes, size_t dstlen) +{ + u8 *dst; + + caam_rsa_drop_leading_zeros(&ptr, &nbytes); + if (!nbytes) + return NULL; + + dst = kzalloc(dstlen, GFP_DMA | GFP_KERNEL); + if (!dst) + return NULL; + + memcpy(dst + (dstlen - nbytes), ptr, nbytes); + + return dst; +} + /** * caam_read_raw_data - Read a raw byte stream as a positive integer. * The function skips buffer's leading zeros, copies the remained data @@ -629,8 +825,29 @@ static void caam_rsa_set_priv_key_form(struct caam_rsa_ctx *ctx, rsa_key->priv_form = FORM2; + rsa_key->dp = caam_read_rsa_crt(raw_key->dp, raw_key->dp_sz, p_sz); + if (!rsa_key->dp) + goto free_tmp2; + + rsa_key->dq = caam_read_rsa_crt(raw_key->dq, raw_key->dq_sz, q_sz); + if (!rsa_key->dq) + goto free_dp; + + rsa_key->qinv = caam_read_rsa_crt(raw_key->qinv, raw_key->qinv_sz, + q_sz); + if (!rsa_key->qinv) + goto free_dq; + + rsa_key->priv_form = FORM3; + return; +free_dq: + kzfree(rsa_key->dq); +free_dp: + kzfree(rsa_key->dp); +free_tmp2: + kzfree(rsa_key->tmp2); free_tmp1: kzfree(rsa_key->tmp1); free_q: diff --git a/drivers/crypto/caam/caampkc.h b/drivers/crypto/caam/caampkc.h index 48e408ceaf75..87ab75e9df43 100644 --- a/drivers/crypto/caam/caampkc.h +++ b/drivers/crypto/caam/caampkc.h @@ -14,7 +14,7 @@ /** * caam_priv_key_form - CAAM RSA private key representation - * CAAM RSA private key may have either of two forms. + * CAAM RSA private key may have either of three forms. * * 1. The first representation consists of the pair (n, d), where the * components have the following meanings: @@ -26,10 +26,22 @@ * p the first prime factor of the RSA modulus n * q the second prime factor of the RSA modulus n * d the RSA private exponent + * + * 3. The third representation consists of the quintuple (p, q, dP, dQ, qInv), + * where the components have the following meanings: + * p the first prime factor of the RSA modulus n + * q the second prime factor of the RSA modulus n + * dP the first factors's CRT exponent + * dQ the second factors's CRT exponent + * qInv the (first) CRT coefficient + * + * The benefit of using the third or the second key form is lower computational + * cost for the decryption and signature operations. */ enum caam_priv_key_form { FORM1, FORM2, + FORM3 }; /** @@ -39,6 +51,9 @@ enum caam_priv_key_form { * @d : RSA private exponent raw byte stream * @p : RSA prime factor p of RSA modulus n * @q : RSA prime factor q of RSA modulus n + * @dp : RSA CRT exponent of p + * @dp : RSA CRT exponent of q + * @qinv : RSA CRT coefficient * @tmp1 : CAAM uses this temporary buffer as internal state buffer. * It is assumed to be as long as p. * @tmp2 : CAAM uses this temporary buffer as internal state buffer. @@ -56,6 +71,9 @@ struct caam_rsa_key { u8 *d; u8 *p; u8 *q; + u8 *dp; + u8 *dq; + u8 *qinv; u8 *tmp1; u8 *tmp2; size_t n_sz; @@ -96,6 +114,7 @@ struct rsa_edesc { struct rsa_pub_pdb pub; struct rsa_priv_f1_pdb priv_f1; struct rsa_priv_f2_pdb priv_f2; + struct rsa_priv_f3_pdb priv_f3; } pdb; u32 hw_desc[]; }; @@ -104,5 +123,6 @@ struct rsa_edesc { void init_rsa_pub_desc(u32 *desc, struct rsa_pub_pdb *pdb); void init_rsa_priv_f1_desc(u32 *desc, struct rsa_priv_f1_pdb *pdb); void init_rsa_priv_f2_desc(u32 *desc, struct rsa_priv_f2_pdb *pdb); +void init_rsa_priv_f3_desc(u32 *desc, struct rsa_priv_f3_pdb *pdb); #endif diff --git a/drivers/crypto/caam/pdb.h b/drivers/crypto/caam/pdb.h index 8ea5e612927e..31e59963f4d2 100644 --- a/drivers/crypto/caam/pdb.h +++ b/drivers/crypto/caam/pdb.h @@ -493,6 +493,7 @@ struct dsa_verify_pdb { #define RSA_PRIV_KEY_FRM_1 0 #define RSA_PRIV_KEY_FRM_2 1 +#define RSA_PRIV_KEY_FRM_3 2 /** * RSA Encrypt Protocol Data Block @@ -554,4 +555,36 @@ struct rsa_priv_f2_pdb { u32 p_q_len; } __packed; +/** + * RSA Decrypt PDB - Private Key Form #3 + * This is the RSA Chinese Reminder Theorem (CRT) form for two prime factors of + * the RSA modulus. + * @sgf : scatter-gather field + * @g_dma : dma address of encrypted input data + * @f_dma : dma address of output data + * @c_dma : dma address of RSA CRT coefficient + * @p_dma : dma address of RSA prime factor p of RSA modulus n + * @q_dma : dma address of RSA prime factor q of RSA modulus n + * @dp_dma : dma address of RSA CRT exponent of RSA prime factor p + * @dp_dma : dma address of RSA CRT exponent of RSA prime factor q + * @tmp1_dma: dma address of temporary buffer. CAAM uses this temporary buffer + * as internal state buffer. It is assumed to be as long as p. + * @tmp2_dma: dma address of temporary buffer. CAAM uses this temporary buffer + * as internal state buffer. It is assumed to be as long as q. + * @p_q_len : length in bytes of first two prime factors of the RSA modulus n + */ +struct rsa_priv_f3_pdb { + u32 sgf; + dma_addr_t g_dma; + dma_addr_t f_dma; + dma_addr_t c_dma; + dma_addr_t p_dma; + dma_addr_t q_dma; + dma_addr_t dp_dma; + dma_addr_t dq_dma; + dma_addr_t tmp1_dma; + dma_addr_t tmp2_dma; + u32 p_q_len; +} __packed; + #endif diff --git a/drivers/crypto/caam/pkc_desc.c b/drivers/crypto/caam/pkc_desc.c index 9d03c7055ed7..9e2ce6fe2e43 100644 --- a/drivers/crypto/caam/pkc_desc.c +++ b/drivers/crypto/caam/pkc_desc.c @@ -51,3 +51,22 @@ void init_rsa_priv_f2_desc(u32 *desc, struct rsa_priv_f2_pdb *pdb) append_operation(desc, OP_TYPE_UNI_PROTOCOL | OP_PCLID_RSADEC_PRVKEY | RSA_PRIV_KEY_FRM_2); } + +/* Descriptor for RSA Private operation - Private Key Form #3 */ +void init_rsa_priv_f3_desc(u32 *desc, struct rsa_priv_f3_pdb *pdb) +{ + init_job_desc_pdb(desc, 0, sizeof(*pdb)); + append_cmd(desc, pdb->sgf); + append_ptr(desc, pdb->g_dma); + append_ptr(desc, pdb->f_dma); + append_ptr(desc, pdb->c_dma); + append_ptr(desc, pdb->p_dma); + append_ptr(desc, pdb->q_dma); + append_ptr(desc, pdb->dp_dma); + append_ptr(desc, pdb->dq_dma); + append_ptr(desc, pdb->tmp1_dma); + append_ptr(desc, pdb->tmp2_dma); + append_cmd(desc, pdb->p_q_len); + append_operation(desc, OP_TYPE_UNI_PROTOCOL | OP_PCLID_RSADEC_PRVKEY | + RSA_PRIV_KEY_FRM_3); +} -- cgit v1.2.3 From ac50b78b22cddb8a85b264b1f172c4239e826fd2 Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Tue, 25 Apr 2017 08:59:44 -0500 Subject: crypto: ccp - Add a module author CC: # 4.9.x+ Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-dev.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c index 92d1c6959f08..b7504562715c 100644 --- a/drivers/crypto/ccp/ccp-dev.c +++ b/drivers/crypto/ccp/ccp-dev.c @@ -31,6 +31,7 @@ #include "ccp-dev.h" MODULE_AUTHOR("Tom Lendacky "); +MODULE_AUTHOR("Gary R Hook "); MODULE_LICENSE("GPL"); MODULE_VERSION("1.0.0"); MODULE_DESCRIPTION("AMD Cryptographic Coprocessor driver"); -- cgit v1.2.3 From f4857f4c2ee9aa4e2aacac1a845352b00197fb57 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 26 Apr 2017 17:11:32 +0100 Subject: crypto: arm64/sha - avoid non-standard inline asm tricks Replace the inline asm which exports struct offsets as ELF symbols with proper const variables exposing the same values. This works around an issue with Clang which does not interpret the "i" (or "I") constraints in the same way as GCC. Signed-off-by: Ard Biesheuvel Tested-by: Matthias Kaehlcke Signed-off-by: Herbert Xu --- arch/arm64/crypto/sha1-ce-core.S | 6 ++++-- arch/arm64/crypto/sha1-ce-glue.c | 11 +++-------- arch/arm64/crypto/sha2-ce-core.S | 6 ++++-- arch/arm64/crypto/sha2-ce-glue.c | 13 +++++-------- 4 files changed, 16 insertions(+), 20 deletions(-) diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S index c98e7e849f06..8550408735a0 100644 --- a/arch/arm64/crypto/sha1-ce-core.S +++ b/arch/arm64/crypto/sha1-ce-core.S @@ -82,7 +82,8 @@ ENTRY(sha1_ce_transform) ldr dgb, [x0, #16] /* load sha1_ce_state::finalize */ - ldr w4, [x0, #:lo12:sha1_ce_offsetof_finalize] + ldr_l w4, sha1_ce_offsetof_finalize, x4 + ldr w4, [x0, x4] /* load input */ 0: ld1 {v8.4s-v11.4s}, [x1], #64 @@ -132,7 +133,8 @@ CPU_LE( rev32 v11.16b, v11.16b ) * the padding is handled by the C code in that case. */ cbz x4, 3f - ldr x4, [x0, #:lo12:sha1_ce_offsetof_count] + ldr_l w4, sha1_ce_offsetof_count, x4 + ldr x4, [x0, x4] movi v9.2d, #0 mov x8, #0x80000000 movi v10.2d, #0 diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c index aefda9868627..ea319c055f5d 100644 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ b/arch/arm64/crypto/sha1-ce-glue.c @@ -17,9 +17,6 @@ #include #include -#define ASM_EXPORT(sym, val) \ - asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val)); - MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel "); MODULE_LICENSE("GPL v2"); @@ -32,6 +29,9 @@ struct sha1_ce_state { asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, int blocks); +const u32 sha1_ce_offsetof_count = offsetof(struct sha1_ce_state, sst.count); +const u32 sha1_ce_offsetof_finalize = offsetof(struct sha1_ce_state, finalize); + static int sha1_ce_update(struct shash_desc *desc, const u8 *data, unsigned int len) { @@ -52,11 +52,6 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data, struct sha1_ce_state *sctx = shash_desc_ctx(desc); bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE); - ASM_EXPORT(sha1_ce_offsetof_count, - offsetof(struct sha1_ce_state, sst.count)); - ASM_EXPORT(sha1_ce_offsetof_finalize, - offsetof(struct sha1_ce_state, finalize)); - /* * Allow the asm code to perform the finalization if there is no * partial data and the input is a round multiple of the block size. diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S index 01cfee066837..679c6c002f4f 100644 --- a/arch/arm64/crypto/sha2-ce-core.S +++ b/arch/arm64/crypto/sha2-ce-core.S @@ -88,7 +88,8 @@ ENTRY(sha2_ce_transform) ld1 {dgav.4s, dgbv.4s}, [x0] /* load sha256_ce_state::finalize */ - ldr w4, [x0, #:lo12:sha256_ce_offsetof_finalize] + ldr_l w4, sha256_ce_offsetof_finalize, x4 + ldr w4, [x0, x4] /* load input */ 0: ld1 {v16.4s-v19.4s}, [x1], #64 @@ -136,7 +137,8 @@ CPU_LE( rev32 v19.16b, v19.16b ) * the padding is handled by the C code in that case. */ cbz x4, 3f - ldr x4, [x0, #:lo12:sha256_ce_offsetof_count] + ldr_l w4, sha256_ce_offsetof_count, x4 + ldr x4, [x0, x4] movi v17.2d, #0 mov x8, #0x80000000 movi v18.2d, #0 diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index 7cd587564a41..0ed9486f75dd 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c @@ -17,9 +17,6 @@ #include #include -#define ASM_EXPORT(sym, val) \ - asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val)); - MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel "); MODULE_LICENSE("GPL v2"); @@ -32,6 +29,11 @@ struct sha256_ce_state { asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src, int blocks); +const u32 sha256_ce_offsetof_count = offsetof(struct sha256_ce_state, + sst.count); +const u32 sha256_ce_offsetof_finalize = offsetof(struct sha256_ce_state, + finalize); + static int sha256_ce_update(struct shash_desc *desc, const u8 *data, unsigned int len) { @@ -52,11 +54,6 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data, struct sha256_ce_state *sctx = shash_desc_ctx(desc); bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE); - ASM_EXPORT(sha256_ce_offsetof_count, - offsetof(struct sha256_ce_state, sst.count)); - ASM_EXPORT(sha256_ce_offsetof_finalize, - offsetof(struct sha256_ce_state, finalize)); - /* * Allow the asm code to perform the finalization if there is no * partial data and the input is a round multiple of the block size. -- cgit v1.2.3 From 0487ccac2032872ea3bcfed08d4ecaefb55cf017 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 28 Apr 2017 18:11:56 +0200 Subject: crypto: aesni - make non-AVX AES-GCM work with any aadlen This is the first step to make the aesni AES-GCM implementation generic. The current code was written for rfc4106, so it handles only some specific sizes of associated data. Signed-off-by: Sabrina Dubroca Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 169 +++++++++++++++++++++++++++++--------- 1 file changed, 132 insertions(+), 37 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 3c465184ff8a..605726aaf0a2 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -89,6 +89,29 @@ SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 ALL_F: .octa 0xffffffffffffffffffffffffffffffff .octa 0x00000000000000000000000000000000 +.section .rodata +.align 16 +.type aad_shift_arr, @object +.size aad_shift_arr, 272 +aad_shift_arr: + .octa 0xffffffffffffffffffffffffffffffff + .octa 0xffffffffffffffffffffffffffffff0C + .octa 0xffffffffffffffffffffffffffff0D0C + .octa 0xffffffffffffffffffffffffff0E0D0C + .octa 0xffffffffffffffffffffffff0F0E0D0C + .octa 0xffffffffffffffffffffff0C0B0A0908 + .octa 0xffffffffffffffffffff0D0C0B0A0908 + .octa 0xffffffffffffffffff0E0D0C0B0A0908 + .octa 0xffffffffffffffff0F0E0D0C0B0A0908 + .octa 0xffffffffffffff0C0B0A090807060504 + .octa 0xffffffffffff0D0C0B0A090807060504 + .octa 0xffffffffff0E0D0C0B0A090807060504 + .octa 0xffffffff0F0E0D0C0B0A090807060504 + .octa 0xffffff0C0B0A09080706050403020100 + .octa 0xffff0D0C0B0A09080706050403020100 + .octa 0xff0E0D0C0B0A09080706050403020100 + .octa 0x0F0E0D0C0B0A09080706050403020100 + .text @@ -252,32 +275,66 @@ XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation mov arg8, %r12 # %r12 = aadLen mov %r12, %r11 pxor %xmm\i, %xmm\i + pxor \XMM2, \XMM2 -_get_AAD_loop\num_initial_blocks\operation: - movd (%r10), \TMP1 - pslldq $12, \TMP1 - psrldq $4, %xmm\i + cmp $16, %r11 + jl _get_AAD_rest8\num_initial_blocks\operation +_get_AAD_blocks\num_initial_blocks\operation: + movdqu (%r10), %xmm\i + PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data + pxor %xmm\i, \XMM2 + GHASH_MUL \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + add $16, %r10 + sub $16, %r12 + sub $16, %r11 + cmp $16, %r11 + jge _get_AAD_blocks\num_initial_blocks\operation + + movdqu \XMM2, %xmm\i + cmp $0, %r11 + je _get_AAD_done\num_initial_blocks\operation + + pxor %xmm\i,%xmm\i + + /* read the last <16B of AAD. since we have at least 4B of + data right after the AAD (the ICV, and maybe some CT), we can + read 4B/8B blocks safely, and then get rid of the extra stuff */ +_get_AAD_rest8\num_initial_blocks\operation: + cmp $4, %r11 + jle _get_AAD_rest4\num_initial_blocks\operation + movq (%r10), \TMP1 + add $8, %r10 + sub $8, %r11 + pslldq $8, \TMP1 + psrldq $8, %xmm\i pxor \TMP1, %xmm\i + jmp _get_AAD_rest8\num_initial_blocks\operation +_get_AAD_rest4\num_initial_blocks\operation: + cmp $0, %r11 + jle _get_AAD_rest0\num_initial_blocks\operation + mov (%r10), %eax + movq %rax, \TMP1 add $4, %r10 - sub $4, %r12 - jne _get_AAD_loop\num_initial_blocks\operation - - cmp $16, %r11 - je _get_AAD_loop2_done\num_initial_blocks\operation - - mov $16, %r12 -_get_AAD_loop2\num_initial_blocks\operation: + sub $4, %r10 + pslldq $12, \TMP1 psrldq $4, %xmm\i - sub $4, %r12 - cmp %r11, %r12 - jne _get_AAD_loop2\num_initial_blocks\operation - -_get_AAD_loop2_done\num_initial_blocks\operation: + pxor \TMP1, %xmm\i +_get_AAD_rest0\num_initial_blocks\operation: + /* finalize: shift out the extra bytes we read, and align + left. since pslldq can only shift by an immediate, we use + vpshufb and an array of shuffle masks */ + movq %r12, %r11 + salq $4, %r11 + movdqu aad_shift_arr(%r11), \TMP1 + PSHUFB_XMM \TMP1, %xmm\i +_get_AAD_rest_final\num_initial_blocks\operation: PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data + pxor \XMM2, %xmm\i + GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 +_get_AAD_done\num_initial_blocks\operation: xor %r11, %r11 # initialise the data pointer offset as zero - - # start AES for num_initial_blocks blocks + # start AES for num_initial_blocks blocks mov %arg5, %rax # %rax = *Y0 movdqu (%rax), \XMM0 # XMM0 = Y0 @@ -322,7 +379,7 @@ aes_loop_initial_dec\num_initial_blocks: # prepare plaintext/ciphertext for GHASH computation .endr .endif - GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + # apply GHASH on num_initial_blocks blocks .if \i == 5 @@ -477,28 +534,66 @@ XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation mov arg8, %r12 # %r12 = aadLen mov %r12, %r11 pxor %xmm\i, %xmm\i -_get_AAD_loop\num_initial_blocks\operation: - movd (%r10), \TMP1 - pslldq $12, \TMP1 - psrldq $4, %xmm\i + pxor \XMM2, \XMM2 + + cmp $16, %r11 + jl _get_AAD_rest8\num_initial_blocks\operation +_get_AAD_blocks\num_initial_blocks\operation: + movdqu (%r10), %xmm\i + PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data + pxor %xmm\i, \XMM2 + GHASH_MUL \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + add $16, %r10 + sub $16, %r12 + sub $16, %r11 + cmp $16, %r11 + jge _get_AAD_blocks\num_initial_blocks\operation + + movdqu \XMM2, %xmm\i + cmp $0, %r11 + je _get_AAD_done\num_initial_blocks\operation + + pxor %xmm\i,%xmm\i + + /* read the last <16B of AAD. since we have at least 4B of + data right after the AAD (the ICV, and maybe some PT), we can + read 4B/8B blocks safely, and then get rid of the extra stuff */ +_get_AAD_rest8\num_initial_blocks\operation: + cmp $4, %r11 + jle _get_AAD_rest4\num_initial_blocks\operation + movq (%r10), \TMP1 + add $8, %r10 + sub $8, %r11 + pslldq $8, \TMP1 + psrldq $8, %xmm\i pxor \TMP1, %xmm\i + jmp _get_AAD_rest8\num_initial_blocks\operation +_get_AAD_rest4\num_initial_blocks\operation: + cmp $0, %r11 + jle _get_AAD_rest0\num_initial_blocks\operation + mov (%r10), %eax + movq %rax, \TMP1 add $4, %r10 - sub $4, %r12 - jne _get_AAD_loop\num_initial_blocks\operation - cmp $16, %r11 - je _get_AAD_loop2_done\num_initial_blocks\operation - mov $16, %r12 -_get_AAD_loop2\num_initial_blocks\operation: + sub $4, %r10 + pslldq $12, \TMP1 psrldq $4, %xmm\i - sub $4, %r12 - cmp %r11, %r12 - jne _get_AAD_loop2\num_initial_blocks\operation -_get_AAD_loop2_done\num_initial_blocks\operation: + pxor \TMP1, %xmm\i +_get_AAD_rest0\num_initial_blocks\operation: + /* finalize: shift out the extra bytes we read, and align + left. since pslldq can only shift by an immediate, we use + vpshufb and an array of shuffle masks */ + movq %r12, %r11 + salq $4, %r11 + movdqu aad_shift_arr(%r11), \TMP1 + PSHUFB_XMM \TMP1, %xmm\i +_get_AAD_rest_final\num_initial_blocks\operation: PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data + pxor \XMM2, %xmm\i + GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 +_get_AAD_done\num_initial_blocks\operation: xor %r11, %r11 # initialise the data pointer offset as zero - - # start AES for num_initial_blocks blocks + # start AES for num_initial_blocks blocks mov %arg5, %rax # %rax = *Y0 movdqu (%rax), \XMM0 # XMM0 = Y0 @@ -543,7 +638,7 @@ aes_loop_initial_enc\num_initial_blocks: # prepare plaintext/ciphertext for GHASH computation .endr .endif - GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + # apply GHASH on num_initial_blocks blocks .if \i == 5 -- cgit v1.2.3 From 38d9deecab77b9eb38cb8bb5dfa43237c6710188 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 28 Apr 2017 18:11:57 +0200 Subject: crypto: aesni - make non-AVX AES-GCM work with all valid auth_tag_len Signed-off-by: Sabrina Dubroca Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 62 ++++++++++++++++++++++++++++++--------- 1 file changed, 48 insertions(+), 14 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 605726aaf0a2..16627fec80b2 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -1549,18 +1549,35 @@ _return_T_decrypt: mov arg10, %r11 # %r11 = auth_tag_len cmp $16, %r11 je _T_16_decrypt - cmp $12, %r11 - je _T_12_decrypt + cmp $8, %r11 + jl _T_4_decrypt _T_8_decrypt: MOVQ_R64_XMM %xmm0, %rax mov %rax, (%r10) - jmp _return_T_done_decrypt -_T_12_decrypt: - MOVQ_R64_XMM %xmm0, %rax - mov %rax, (%r10) + add $8, %r10 + sub $8, %r11 psrldq $8, %xmm0 + cmp $0, %r11 + je _return_T_done_decrypt +_T_4_decrypt: + movd %xmm0, %eax + mov %eax, (%r10) + add $4, %r10 + sub $4, %r11 + psrldq $4, %xmm0 + cmp $0, %r11 + je _return_T_done_decrypt +_T_123_decrypt: movd %xmm0, %eax - mov %eax, 8(%r10) + cmp $2, %r11 + jl _T_1_decrypt + mov %ax, (%r10) + cmp $2, %r11 + je _return_T_done_decrypt + add $2, %r10 + sar $16, %eax +_T_1_decrypt: + mov %al, (%r10) jmp _return_T_done_decrypt _T_16_decrypt: movdqu %xmm0, (%r10) @@ -1813,18 +1830,35 @@ _return_T_encrypt: mov arg10, %r11 # %r11 = auth_tag_len cmp $16, %r11 je _T_16_encrypt - cmp $12, %r11 - je _T_12_encrypt + cmp $8, %r11 + jl _T_4_encrypt _T_8_encrypt: MOVQ_R64_XMM %xmm0, %rax mov %rax, (%r10) - jmp _return_T_done_encrypt -_T_12_encrypt: - MOVQ_R64_XMM %xmm0, %rax - mov %rax, (%r10) + add $8, %r10 + sub $8, %r11 psrldq $8, %xmm0 + cmp $0, %r11 + je _return_T_done_encrypt +_T_4_encrypt: + movd %xmm0, %eax + mov %eax, (%r10) + add $4, %r10 + sub $4, %r11 + psrldq $4, %xmm0 + cmp $0, %r11 + je _return_T_done_encrypt +_T_123_encrypt: movd %xmm0, %eax - mov %eax, 8(%r10) + cmp $2, %r11 + jl _T_1_encrypt + mov %ax, (%r10) + cmp $2, %r11 + je _return_T_done_encrypt + add $2, %r10 + sar $16, %eax +_T_1_encrypt: + mov %al, (%r10) jmp _return_T_done_encrypt _T_16_encrypt: movdqu %xmm0, (%r10) -- cgit v1.2.3 From e10f9cb223511cfdc509f5a655f11ff6cbc5508c Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 28 Apr 2017 18:11:58 +0200 Subject: crypto: aesni - make AVX AES-GCM work with any aadlen This is the first step to make the aesni AES-GCM implementation generic. The current code was written for rfc4106, so it handles only some specific sizes of associated data. Signed-off-by: Sabrina Dubroca Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_avx-x86_64.S | 122 ++++++++++++++++++++++--------- 1 file changed, 88 insertions(+), 34 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S index d664382c6e56..a73117c84904 100644 --- a/arch/x86/crypto/aesni-intel_avx-x86_64.S +++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S @@ -155,6 +155,30 @@ SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 ALL_F: .octa 0xffffffffffffffffffffffffffffffff .octa 0x00000000000000000000000000000000 +.section .rodata +.align 16 +.type aad_shift_arr, @object +.size aad_shift_arr, 272 +aad_shift_arr: + .octa 0xffffffffffffffffffffffffffffffff + .octa 0xffffffffffffffffffffffffffffff0C + .octa 0xffffffffffffffffffffffffffff0D0C + .octa 0xffffffffffffffffffffffffff0E0D0C + .octa 0xffffffffffffffffffffffff0F0E0D0C + .octa 0xffffffffffffffffffffff0C0B0A0908 + .octa 0xffffffffffffffffffff0D0C0B0A0908 + .octa 0xffffffffffffffffff0E0D0C0B0A0908 + .octa 0xffffffffffffffff0F0E0D0C0B0A0908 + .octa 0xffffffffffffff0C0B0A090807060504 + .octa 0xffffffffffff0D0C0B0A090807060504 + .octa 0xffffffffff0E0D0C0B0A090807060504 + .octa 0xffffffff0F0E0D0C0B0A090807060504 + .octa 0xffffff0C0B0A09080706050403020100 + .octa 0xffff0D0C0B0A09080706050403020100 + .octa 0xff0E0D0C0B0A09080706050403020100 + .octa 0x0F0E0D0C0B0A09080706050403020100 + + .text @@ -372,41 +396,72 @@ VARIABLE_OFFSET = 16*8 .macro INITIAL_BLOCKS_AVX num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC i = (8-\num_initial_blocks) + j = 0 setreg - mov arg6, %r10 # r10 = AAD - mov arg7, %r12 # r12 = aadLen - - - mov %r12, %r11 - - vpxor reg_i, reg_i, reg_i -_get_AAD_loop\@: - vmovd (%r10), \T1 - vpslldq $12, \T1, \T1 - vpsrldq $4, reg_i, reg_i - vpxor \T1, reg_i, reg_i - - add $4, %r10 - sub $4, %r12 - jg _get_AAD_loop\@ - - - cmp $16, %r11 - je _get_AAD_loop2_done\@ - mov $16, %r12 - -_get_AAD_loop2\@: - vpsrldq $4, reg_i, reg_i - sub $4, %r12 - cmp %r11, %r12 - jg _get_AAD_loop2\@ - -_get_AAD_loop2_done\@: - - #byte-reflect the AAD data - vpshufb SHUF_MASK(%rip), reg_i, reg_i - + mov arg6, %r10 # r10 = AAD + mov arg7, %r12 # r12 = aadLen + + + mov %r12, %r11 + + vpxor reg_j, reg_j, reg_j + vpxor reg_i, reg_i, reg_i + cmp $16, %r11 + jl _get_AAD_rest8\@ +_get_AAD_blocks\@: + vmovdqu (%r10), reg_i + vpshufb SHUF_MASK(%rip), reg_i, reg_i + vpxor reg_i, reg_j, reg_j + GHASH_MUL_AVX reg_j, \T2, \T1, \T3, \T4, \T5, \T6 + add $16, %r10 + sub $16, %r12 + sub $16, %r11 + cmp $16, %r11 + jge _get_AAD_blocks\@ + vmovdqu reg_j, reg_i + cmp $0, %r11 + je _get_AAD_done\@ + + vpxor reg_i, reg_i, reg_i + + /* read the last <16B of AAD. since we have at least 4B of + data right after the AAD (the ICV, and maybe some CT), we can + read 4B/8B blocks safely, and then get rid of the extra stuff */ +_get_AAD_rest8\@: + cmp $4, %r11 + jle _get_AAD_rest4\@ + movq (%r10), \T1 + add $8, %r10 + sub $8, %r11 + vpslldq $8, \T1, \T1 + vpsrldq $8, reg_i, reg_i + vpxor \T1, reg_i, reg_i + jmp _get_AAD_rest8\@ +_get_AAD_rest4\@: + cmp $0, %r11 + jle _get_AAD_rest0\@ + mov (%r10), %eax + movq %rax, \T1 + add $4, %r10 + sub $4, %r11 + vpslldq $12, \T1, \T1 + vpsrldq $4, reg_i, reg_i + vpxor \T1, reg_i, reg_i +_get_AAD_rest0\@: + /* finalize: shift out the extra bytes we read, and align + left. since pslldq can only shift by an immediate, we use + vpshufb and an array of shuffle masks */ + movq %r12, %r11 + salq $4, %r11 + movdqu aad_shift_arr(%r11), \T1 + vpshufb \T1, reg_i, reg_i +_get_AAD_rest_final\@: + vpshufb SHUF_MASK(%rip), reg_i, reg_i + vpxor reg_j, reg_i, reg_i + GHASH_MUL_AVX reg_i, \T2, \T1, \T3, \T4, \T5, \T6 + +_get_AAD_done\@: # initialize the data pointer offset as zero xor %r11, %r11 @@ -480,7 +535,6 @@ _get_AAD_loop2_done\@: i = (8-\num_initial_blocks) j = (9-\num_initial_blocks) setreg - GHASH_MUL_AVX reg_i, \T2, \T1, \T3, \T4, \T5, \T6 .rep \num_initial_blocks vpxor reg_i, reg_j, reg_j -- cgit v1.2.3 From 0120af7795c4e526554e7d88f135dd1a028929cf Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 28 Apr 2017 18:11:59 +0200 Subject: crypto: aesni - make AVX AES-GCM work with all valid auth_tag_len Signed-off-by: Sabrina Dubroca Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_avx-x86_64.S | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S index a73117c84904..ee6283120f83 100644 --- a/arch/x86/crypto/aesni-intel_avx-x86_64.S +++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S @@ -1481,19 +1481,36 @@ _return_T\@: cmp $16, %r11 je _T_16\@ - cmp $12, %r11 - je _T_12\@ + cmp $8, %r11 + jl _T_4\@ _T_8\@: vmovq %xmm9, %rax mov %rax, (%r10) - jmp _return_T_done\@ -_T_12\@: - vmovq %xmm9, %rax - mov %rax, (%r10) + add $8, %r10 + sub $8, %r11 vpsrldq $8, %xmm9, %xmm9 + cmp $0, %r11 + je _return_T_done\@ +_T_4\@: vmovd %xmm9, %eax - mov %eax, 8(%r10) + mov %eax, (%r10) + add $4, %r10 + sub $4, %r11 + vpsrldq $4, %xmm9, %xmm9 + cmp $0, %r11 + je _return_T_done\@ +_T_123\@: + vmovd %xmm9, %eax + cmp $2, %r11 + jl _T_1\@ + mov %ax, (%r10) + cmp $2, %r11 + je _return_T_done\@ + add $2, %r10 + sar $16, %eax +_T_1\@: + mov %al, (%r10) jmp _return_T_done\@ _T_16\@: -- cgit v1.2.3 From 27352c45c56181b69aa6c2e1aad48f80dd5f9ca8 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 28 Apr 2017 18:12:00 +0200 Subject: crypto: aesni - make AVX2 AES-GCM work with any aadlen This is the first step to make the aesni AES-GCM implementation generic. The current code was written for rfc4106, so it handles only some specific sizes of associated data. Signed-off-by: Sabrina Dubroca Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_avx-x86_64.S | 85 ++++++++++++++++++++++---------- 1 file changed, 58 insertions(+), 27 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S index ee6283120f83..7230808a7cef 100644 --- a/arch/x86/crypto/aesni-intel_avx-x86_64.S +++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S @@ -1702,41 +1702,73 @@ ENDPROC(aesni_gcm_dec_avx_gen2) .macro INITIAL_BLOCKS_AVX2 num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC VER i = (8-\num_initial_blocks) + j = 0 setreg - mov arg6, %r10 # r10 = AAD - mov arg7, %r12 # r12 = aadLen - - - mov %r12, %r11 - - vpxor reg_i, reg_i, reg_i -_get_AAD_loop\@: - vmovd (%r10), \T1 - vpslldq $12, \T1, \T1 - vpsrldq $4, reg_i, reg_i - vpxor \T1, reg_i, reg_i + mov arg6, %r10 # r10 = AAD + mov arg7, %r12 # r12 = aadLen - add $4, %r10 - sub $4, %r12 - jg _get_AAD_loop\@ + mov %r12, %r11 - cmp $16, %r11 - je _get_AAD_loop2_done\@ - mov $16, %r12 + vpxor reg_j, reg_j, reg_j + vpxor reg_i, reg_i, reg_i -_get_AAD_loop2\@: - vpsrldq $4, reg_i, reg_i - sub $4, %r12 - cmp %r11, %r12 - jg _get_AAD_loop2\@ + cmp $16, %r11 + jl _get_AAD_rest8\@ +_get_AAD_blocks\@: + vmovdqu (%r10), reg_i + vpshufb SHUF_MASK(%rip), reg_i, reg_i + vpxor reg_i, reg_j, reg_j + GHASH_MUL_AVX2 reg_j, \T2, \T1, \T3, \T4, \T5, \T6 + add $16, %r10 + sub $16, %r12 + sub $16, %r11 + cmp $16, %r11 + jge _get_AAD_blocks\@ + vmovdqu reg_j, reg_i + cmp $0, %r11 + je _get_AAD_done\@ -_get_AAD_loop2_done\@: + vpxor reg_i, reg_i, reg_i - #byte-reflect the AAD data - vpshufb SHUF_MASK(%rip), reg_i, reg_i + /* read the last <16B of AAD. since we have at least 4B of + data right after the AAD (the ICV, and maybe some CT), we can + read 4B/8B blocks safely, and then get rid of the extra stuff */ +_get_AAD_rest8\@: + cmp $4, %r11 + jle _get_AAD_rest4\@ + movq (%r10), \T1 + add $8, %r10 + sub $8, %r11 + vpslldq $8, \T1, \T1 + vpsrldq $8, reg_i, reg_i + vpxor \T1, reg_i, reg_i + jmp _get_AAD_rest8\@ +_get_AAD_rest4\@: + cmp $0, %r11 + jle _get_AAD_rest0\@ + mov (%r10), %eax + movq %rax, \T1 + add $4, %r10 + sub $4, %r11 + vpslldq $12, \T1, \T1 + vpsrldq $4, reg_i, reg_i + vpxor \T1, reg_i, reg_i +_get_AAD_rest0\@: + /* finalize: shift out the extra bytes we read, and align + left. since pslldq can only shift by an immediate, we use + vpshufb and an array of shuffle masks */ + movq %r12, %r11 + salq $4, %r11 + movdqu aad_shift_arr(%r11), \T1 + vpshufb \T1, reg_i, reg_i +_get_AAD_rest_final\@: + vpshufb SHUF_MASK(%rip), reg_i, reg_i + vpxor reg_j, reg_i, reg_i + GHASH_MUL_AVX2 reg_i, \T2, \T1, \T3, \T4, \T5, \T6 +_get_AAD_done\@: # initialize the data pointer offset as zero xor %r11, %r11 @@ -1811,7 +1843,6 @@ _get_AAD_loop2_done\@: i = (8-\num_initial_blocks) j = (9-\num_initial_blocks) setreg - GHASH_MUL_AVX2 reg_i, \T2, \T1, \T3, \T4, \T5, \T6 .rep \num_initial_blocks vpxor reg_i, reg_j, reg_j -- cgit v1.2.3 From 2df7e813627622daf1168e30cd2b29467b7e14e5 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 28 Apr 2017 18:12:01 +0200 Subject: crypto: aesni - make AVX2 AES-GCM work with all valid auth_tag_len Signed-off-by: Sabrina Dubroca Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_avx-x86_64.S | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S index 7230808a7cef..faecb1518bf8 100644 --- a/arch/x86/crypto/aesni-intel_avx-x86_64.S +++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S @@ -2804,19 +2804,36 @@ _return_T\@: cmp $16, %r11 je _T_16\@ - cmp $12, %r11 - je _T_12\@ + cmp $8, %r11 + jl _T_4\@ _T_8\@: vmovq %xmm9, %rax mov %rax, (%r10) - jmp _return_T_done\@ -_T_12\@: - vmovq %xmm9, %rax - mov %rax, (%r10) + add $8, %r10 + sub $8, %r11 vpsrldq $8, %xmm9, %xmm9 + cmp $0, %r11 + je _return_T_done\@ +_T_4\@: vmovd %xmm9, %eax - mov %eax, 8(%r10) + mov %eax, (%r10) + add $4, %r10 + sub $4, %r11 + vpsrldq $4, %xmm9, %xmm9 + cmp $0, %r11 + je _return_T_done\@ +_T_123\@: + vmovd %xmm9, %eax + cmp $2, %r11 + jl _T_1\@ + mov %ax, (%r10) + cmp $2, %r11 + je _return_T_done\@ + add $2, %r10 + sar $16, %eax +_T_1\@: + mov %al, (%r10) jmp _return_T_done\@ _T_16\@: -- cgit v1.2.3 From cce2ea8d90fec72ed954830e73f5d4995cc79193 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 28 Apr 2017 18:12:02 +0200 Subject: crypto: aesni - add generic gcm(aes) Now that the asm side of things can support all the valid lengths of ICV and all lengths of associated data, provide the glue code to expose a generic gcm(aes) crypto algorithm. Signed-off-by: Sabrina Dubroca Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 208 ++++++++++++++++++++++++++++--------- 1 file changed, 158 insertions(+), 50 deletions(-) diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 93de8ea51548..4a55cdcdc008 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -61,6 +61,11 @@ struct aesni_rfc4106_gcm_ctx { u8 nonce[4]; }; +struct generic_gcmaes_ctx { + u8 hash_subkey[16] AESNI_ALIGN_ATTR; + struct crypto_aes_ctx aes_key_expanded AESNI_ALIGN_ATTR; +}; + struct aesni_xts_ctx { u8 raw_tweak_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR; u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR; @@ -102,13 +107,11 @@ asmlinkage void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, u8 *out, * u8 *out, Ciphertext output. Encrypt in-place is allowed. * const u8 *in, Plaintext input * unsigned long plaintext_len, Length of data in bytes for encryption. - * u8 *iv, Pre-counter block j0: 4 byte salt (from Security Association) - * concatenated with 8 byte Initialisation Vector (from IPSec ESP - * Payload) concatenated with 0x00000001. 16-byte aligned pointer. + * u8 *iv, Pre-counter block j0: 12 byte IV concatenated with 0x00000001. + * 16-byte aligned pointer. * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. * const u8 *aad, Additional Authentication Data (AAD) - * unsigned long aad_len, Length of AAD in bytes. With RFC4106 this - * is going to be 8 or 12 bytes + * unsigned long aad_len, Length of AAD in bytes. * u8 *auth_tag, Authenticated Tag output. * unsigned long auth_tag_len), Authenticated Tag Length in bytes. * Valid values are 16 (most likely), 12 or 8. @@ -123,9 +126,8 @@ asmlinkage void aesni_gcm_enc(void *ctx, u8 *out, * u8 *out, Plaintext output. Decrypt in-place is allowed. * const u8 *in, Ciphertext input * unsigned long ciphertext_len, Length of data in bytes for decryption. - * u8 *iv, Pre-counter block j0: 4 byte salt (from Security Association) - * concatenated with 8 byte Initialisation Vector (from IPSec ESP - * Payload) concatenated with 0x00000001. 16-byte aligned pointer. + * u8 *iv, Pre-counter block j0: 12 byte IV concatenated with 0x00000001. + * 16-byte aligned pointer. * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. * const u8 *aad, Additional Authentication Data (AAD) * unsigned long aad_len, Length of AAD in bytes. With RFC4106 this is going @@ -275,6 +277,16 @@ aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm) align = 1; return PTR_ALIGN(crypto_aead_ctx(tfm), align); } + +static inline struct +generic_gcmaes_ctx *generic_gcmaes_ctx_get(struct crypto_aead *tfm) +{ + unsigned long align = AESNI_ALIGN; + + if (align <= crypto_tfm_ctx_alignment()) + align = 1; + return PTR_ALIGN(crypto_aead_ctx(tfm), align); +} #endif static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) @@ -712,32 +724,34 @@ static int rfc4106_set_authsize(struct crypto_aead *parent, return crypto_aead_setauthsize(&cryptd_tfm->base, authsize); } -static int helper_rfc4106_encrypt(struct aead_request *req) +static int generic_gcmaes_set_authsize(struct crypto_aead *tfm, + unsigned int authsize) +{ + switch (authsize) { + case 4: + case 8: + case 12: + case 13: + case 14: + case 15: + case 16: + break; + default: + return -EINVAL; + } + + return 0; +} + +static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen, + u8 *hash_subkey, u8 *iv, void *aes_ctx) { u8 one_entry_in_sg = 0; u8 *src, *dst, *assoc; - __be32 counter = cpu_to_be32(1); struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); - void *aes_ctx = &(ctx->aes_key_expanded); unsigned long auth_tag_len = crypto_aead_authsize(tfm); - u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); struct scatter_walk src_sg_walk; struct scatter_walk dst_sg_walk = {}; - unsigned int i; - - /* Assuming we are supporting rfc4106 64-bit extended */ - /* sequence numbers We need to have the AAD length equal */ - /* to 16 or 20 bytes */ - if (unlikely(req->assoclen != 16 && req->assoclen != 20)) - return -EINVAL; - - /* IV below built */ - for (i = 0; i < 4; i++) - *(iv+i) = ctx->nonce[i]; - for (i = 0; i < 8; i++) - *(iv+4+i) = req->iv[i]; - *((__be32 *)(iv+12)) = counter; if (sg_is_last(req->src) && (!PageHighMem(sg_page(req->src)) || @@ -768,7 +782,7 @@ static int helper_rfc4106_encrypt(struct aead_request *req) kernel_fpu_begin(); aesni_gcm_enc_tfm(aes_ctx, dst, src, req->cryptlen, iv, - ctx->hash_subkey, assoc, req->assoclen - 8, + hash_subkey, assoc, assoclen, dst + req->cryptlen, auth_tag_len); kernel_fpu_end(); @@ -791,37 +805,20 @@ static int helper_rfc4106_encrypt(struct aead_request *req) return 0; } -static int helper_rfc4106_decrypt(struct aead_request *req) +static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen, + u8 *hash_subkey, u8 *iv, void *aes_ctx) { u8 one_entry_in_sg = 0; u8 *src, *dst, *assoc; unsigned long tempCipherLen = 0; - __be32 counter = cpu_to_be32(1); - int retval = 0; struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); - void *aes_ctx = &(ctx->aes_key_expanded); unsigned long auth_tag_len = crypto_aead_authsize(tfm); - u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); u8 authTag[16]; struct scatter_walk src_sg_walk; struct scatter_walk dst_sg_walk = {}; - unsigned int i; - - if (unlikely(req->assoclen != 16 && req->assoclen != 20)) - return -EINVAL; - - /* Assuming we are supporting rfc4106 64-bit extended */ - /* sequence numbers We need to have the AAD length */ - /* equal to 16 or 20 bytes */ + int retval = 0; tempCipherLen = (unsigned long)(req->cryptlen - auth_tag_len); - /* IV below built */ - for (i = 0; i < 4; i++) - *(iv+i) = ctx->nonce[i]; - for (i = 0; i < 8; i++) - *(iv+4+i) = req->iv[i]; - *((__be32 *)(iv+12)) = counter; if (sg_is_last(req->src) && (!PageHighMem(sg_page(req->src)) || @@ -838,7 +835,6 @@ static int helper_rfc4106_decrypt(struct aead_request *req) scatterwalk_start(&dst_sg_walk, req->dst); dst = scatterwalk_map(&dst_sg_walk) + req->assoclen; } - } else { /* Allocate memory for src, dst, assoc */ assoc = kmalloc(req->cryptlen + req->assoclen, GFP_ATOMIC); @@ -850,9 +846,10 @@ static int helper_rfc4106_decrypt(struct aead_request *req) dst = src; } + kernel_fpu_begin(); aesni_gcm_dec_tfm(aes_ctx, dst, src, tempCipherLen, iv, - ctx->hash_subkey, assoc, req->assoclen - 8, + hash_subkey, assoc, assoclen, authTag, auth_tag_len); kernel_fpu_end(); @@ -875,6 +872,60 @@ static int helper_rfc4106_decrypt(struct aead_request *req) kfree(assoc); } return retval; + +} + +static int helper_rfc4106_encrypt(struct aead_request *req) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); + void *aes_ctx = &(ctx->aes_key_expanded); + u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); + unsigned int i; + __be32 counter = cpu_to_be32(1); + + /* Assuming we are supporting rfc4106 64-bit extended */ + /* sequence numbers We need to have the AAD length equal */ + /* to 16 or 20 bytes */ + if (unlikely(req->assoclen != 16 && req->assoclen != 20)) + return -EINVAL; + + /* IV below built */ + for (i = 0; i < 4; i++) + *(iv+i) = ctx->nonce[i]; + for (i = 0; i < 8; i++) + *(iv+4+i) = req->iv[i]; + *((__be32 *)(iv+12)) = counter; + + return gcmaes_encrypt(req, req->assoclen - 8, ctx->hash_subkey, iv, + aes_ctx); +} + +static int helper_rfc4106_decrypt(struct aead_request *req) +{ + __be32 counter = cpu_to_be32(1); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); + void *aes_ctx = &(ctx->aes_key_expanded); + u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); + unsigned int i; + + if (unlikely(req->assoclen != 16 && req->assoclen != 20)) + return -EINVAL; + + /* Assuming we are supporting rfc4106 64-bit extended */ + /* sequence numbers We need to have the AAD length */ + /* equal to 16 or 20 bytes */ + + /* IV below built */ + for (i = 0; i < 4; i++) + *(iv+i) = ctx->nonce[i]; + for (i = 0; i < 8; i++) + *(iv+4+i) = req->iv[i]; + *((__be32 *)(iv+12)) = counter; + + return gcmaes_decrypt(req, req->assoclen - 8, ctx->hash_subkey, iv, + aes_ctx); } static int rfc4106_encrypt(struct aead_request *req) @@ -1035,6 +1086,46 @@ struct { }; #ifdef CONFIG_X86_64 +static int generic_gcmaes_set_key(struct crypto_aead *aead, const u8 *key, + unsigned int key_len) +{ + struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(aead); + + return aes_set_key_common(crypto_aead_tfm(aead), + &ctx->aes_key_expanded, key, key_len) ?: + rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); +} + +static int generic_gcmaes_encrypt(struct aead_request *req) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm); + void *aes_ctx = &(ctx->aes_key_expanded); + u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); + __be32 counter = cpu_to_be32(1); + + memcpy(iv, req->iv, 12); + *((__be32 *)(iv+12)) = counter; + + return gcmaes_encrypt(req, req->assoclen, ctx->hash_subkey, iv, + aes_ctx); +} + +static int generic_gcmaes_decrypt(struct aead_request *req) +{ + __be32 counter = cpu_to_be32(1); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); + void *aes_ctx = &(ctx->aes_key_expanded); + u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); + + memcpy(iv, req->iv, 12); + *((__be32 *)(iv+12)) = counter; + + return gcmaes_decrypt(req, req->assoclen, ctx->hash_subkey, iv, + aes_ctx); +} + static struct aead_alg aesni_aead_algs[] = { { .setkey = common_rfc4106_set_key, .setauthsize = common_rfc4106_set_authsize, @@ -1069,6 +1160,23 @@ static struct aead_alg aesni_aead_algs[] = { { .cra_ctxsize = sizeof(struct cryptd_aead *), .cra_module = THIS_MODULE, }, +}, { + .setkey = generic_gcmaes_set_key, + .setauthsize = generic_gcmaes_set_authsize, + .encrypt = generic_gcmaes_encrypt, + .decrypt = generic_gcmaes_decrypt, + .ivsize = 12, + .maxauthsize = 16, + .base = { + .cra_name = "gcm(aes)", + .cra_driver_name = "generic-gcm-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct generic_gcmaes_ctx), + .cra_alignmask = AESNI_ALIGN - 1, + .cra_module = THIS_MODULE, + }, } }; #else static struct aead_alg aesni_aead_algs[0]; -- cgit v1.2.3 From 986130bf3701e9875a820e06ed5c6ae0a851900b Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Mon, 15 May 2017 13:52:03 +0530 Subject: hwrng: omap3-rom - Handle return value of clk_prepare_enable Here, Clock enable can failed. So adding an error check for clk_prepare_enable. Signed-off-by: Arvind Yadav Signed-off-by: Herbert Xu --- drivers/char/hw_random/omap3-rom-rng.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/char/hw_random/omap3-rom-rng.c b/drivers/char/hw_random/omap3-rom-rng.c index 37a58d78aab3..38b719017186 100644 --- a/drivers/char/hw_random/omap3-rom-rng.c +++ b/drivers/char/hw_random/omap3-rom-rng.c @@ -53,7 +53,10 @@ static int omap3_rom_rng_get_random(void *buf, unsigned int count) cancel_delayed_work_sync(&idle_work); if (rng_idle) { - clk_prepare_enable(rng_clk); + r = clk_prepare_enable(rng_clk); + if (r) + return r; + r = omap3_rom_rng_call(0, 0, RNG_GEN_PRNG_HW_INIT); if (r != 0) { clk_disable_unprepare(rng_clk); @@ -88,6 +91,8 @@ static struct hwrng omap3_rom_rng_ops = { static int omap3_rom_rng_probe(struct platform_device *pdev) { + int ret = 0; + pr_info("initializing\n"); omap3_rom_rng_call = pdev->dev.platform_data; @@ -104,7 +109,9 @@ static int omap3_rom_rng_probe(struct platform_device *pdev) } /* Leave the RNG in reset state. */ - clk_prepare_enable(rng_clk); + ret = clk_prepare_enable(rng_clk); + if (ret) + return ret; omap3_rom_rng_idle(0); return hwrng_register(&omap3_rom_rng_ops); -- cgit v1.2.3 From 5f052c9c631d360d6ec6202714b9d6529a21117a Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Tue, 16 May 2017 13:57:41 +0530 Subject: crypto: img-hash - Handle return value of clk_prepare_enable Here, Clock enable can failed. So adding an error check for clk_prepare_enable. Signed-off-by: Arvind Yadav Signed-off-by: Herbert Xu --- drivers/crypto/img-hash.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c index 9b07f3d88feb..0c6a917a9ab8 100644 --- a/drivers/crypto/img-hash.c +++ b/drivers/crypto/img-hash.c @@ -1088,9 +1088,17 @@ static int img_hash_suspend(struct device *dev) static int img_hash_resume(struct device *dev) { struct img_hash_dev *hdev = dev_get_drvdata(dev); + int ret; - clk_prepare_enable(hdev->hash_clk); - clk_prepare_enable(hdev->sys_clk); + ret = clk_prepare_enable(hdev->hash_clk); + if (ret) + return ret; + + ret = clk_prepare_enable(hdev->sys_clk); + if (ret) { + clk_disable_unprepare(hdev->hash_clk); + return ret; + } return 0; } -- cgit v1.2.3 From 248c65056ccca7d51db78e6821ff89a020e168bb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 16 May 2017 16:21:05 +0200 Subject: crypto: qat - use pcie_flr instead of duplicating it Signed-off-by: Christoph Hellwig Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/adf_aer.c | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) diff --git a/drivers/crypto/qat/qat_common/adf_aer.c b/drivers/crypto/qat/qat_common/adf_aer.c index 2839fccdd84b..d3e25c37dc33 100644 --- a/drivers/crypto/qat/qat_common/adf_aer.c +++ b/drivers/crypto/qat/qat_common/adf_aer.c @@ -109,20 +109,7 @@ EXPORT_SYMBOL_GPL(adf_reset_sbr); void adf_reset_flr(struct adf_accel_dev *accel_dev) { - struct pci_dev *pdev = accel_to_pci_dev(accel_dev); - u16 control = 0; - int pos = 0; - - dev_info(&GET_DEV(accel_dev), "Function level reset\n"); - pos = pci_pcie_cap(pdev); - if (!pos) { - dev_err(&GET_DEV(accel_dev), "Restart device failed\n"); - return; - } - pci_read_config_word(pdev, pos + PCI_EXP_DEVCTL, &control); - control |= PCI_EXP_DEVCTL_BCR_FLR; - pci_write_config_word(pdev, pos + PCI_EXP_DEVCTL, control); - msleep(100); + pcie_flr(accel_to_pci_dev(accel_dev)); } EXPORT_SYMBOL_GPL(adf_reset_flr); -- cgit v1.2.3 From 9417cd1c517a89ccfb5cc3d3f2a0e50fc64f3445 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 16 May 2017 21:03:08 -0700 Subject: crypto: x86/aes - Don't use %rbp as temporary register When using the "aes-asm" implementation of AES (*not* the AES-NI implementation) on an x86_64, v4.12-rc1 kernel with lockdep enabled, the following warning was reported, along with a long unwinder dump: WARNING: kernel stack regs at ffffc90000643558 in kworker/u4:2:155 has bad 'bp' value 000000000000001c The problem is that aes_enc_block() and aes_dec_block() use %rbp as a temporary register, which breaks stack traces if an interrupt occurs. Fix this by replacing %rbp with %r9, which was being used to hold the saved value of %rbp. This required rearranging the AES round macro slightly since %r9d cannot be used as the target of a move from %ah-%dh. Performance is essentially unchanged --- actually about 0.2% faster than before. Interestingly, I also measured aes-generic as being nearly 7% faster than aes-asm, so perhaps aes-asm has outlived its usefulness... Signed-off-by: Eric Biggers Reviewed-by: Josh Poimboeuf Signed-off-by: Herbert Xu --- arch/x86/crypto/aes-x86_64-asm_64.S | 47 +++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S index 910565547163..8739cf7795de 100644 --- a/arch/x86/crypto/aes-x86_64-asm_64.S +++ b/arch/x86/crypto/aes-x86_64-asm_64.S @@ -42,17 +42,15 @@ #define R5E %esi #define R6 %rdi #define R6E %edi -#define R7 %rbp -#define R7E %ebp +#define R7 %r9 /* don't use %rbp; it breaks stack traces */ +#define R7E %r9d #define R8 %r8 -#define R9 %r9 #define R10 %r10 #define R11 %r11 -#define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \ +#define prologue(FUNC,KEY,B128,B192,r1,r2,r5,r6,r7,r8,r9,r10,r11) \ ENTRY(FUNC); \ movq r1,r2; \ - movq r3,r4; \ leaq KEY+48(r8),r9; \ movq r10,r11; \ movl (r7),r5 ## E; \ @@ -70,9 +68,8 @@ je B192; \ leaq 32(r9),r9; -#define epilogue(FUNC,r1,r2,r3,r4,r5,r6,r7,r8,r9) \ +#define epilogue(FUNC,r1,r2,r5,r6,r7,r8,r9) \ movq r1,r2; \ - movq r3,r4; \ movl r5 ## E,(r9); \ movl r6 ## E,4(r9); \ movl r7 ## E,8(r9); \ @@ -88,12 +85,12 @@ movl TAB(,r6,4),r6 ## E; \ roll $16,r2 ## E; \ shrl $16,r4 ## E; \ - movzbl r4 ## H,r7 ## E; \ - movzbl r4 ## L,r4 ## E; \ + movzbl r4 ## L,r7 ## E; \ + movzbl r4 ## H,r4 ## E; \ xorl OFFSET(r8),ra ## E; \ xorl OFFSET+4(r8),rb ## E; \ - xorl TAB+3072(,r7,4),r5 ## E;\ - xorl TAB+2048(,r4,4),r6 ## E;\ + xorl TAB+3072(,r4,4),r5 ## E;\ + xorl TAB+2048(,r7,4),r6 ## E;\ movzbl r1 ## L,r7 ## E; \ movzbl r1 ## H,r4 ## E; \ movl TAB+1024(,r4,4),r4 ## E;\ @@ -101,19 +98,19 @@ roll $16,r1 ## E; \ shrl $16,r3 ## E; \ xorl TAB(,r7,4),r5 ## E; \ - movzbl r3 ## H,r7 ## E; \ - movzbl r3 ## L,r3 ## E; \ - xorl TAB+3072(,r7,4),r4 ## E;\ - xorl TAB+2048(,r3,4),r5 ## E;\ - movzbl r1 ## H,r7 ## E; \ - movzbl r1 ## L,r3 ## E; \ + movzbl r3 ## L,r7 ## E; \ + movzbl r3 ## H,r3 ## E; \ + xorl TAB+3072(,r3,4),r4 ## E;\ + xorl TAB+2048(,r7,4),r5 ## E;\ + movzbl r1 ## L,r7 ## E; \ + movzbl r1 ## H,r3 ## E; \ shrl $16,r1 ## E; \ - xorl TAB+3072(,r7,4),r6 ## E;\ - movl TAB+2048(,r3,4),r3 ## E;\ - movzbl r1 ## H,r7 ## E; \ - movzbl r1 ## L,r1 ## E; \ - xorl TAB+1024(,r7,4),r6 ## E;\ - xorl TAB(,r1,4),r3 ## E; \ + xorl TAB+3072(,r3,4),r6 ## E;\ + movl TAB+2048(,r7,4),r3 ## E;\ + movzbl r1 ## L,r7 ## E; \ + movzbl r1 ## H,r1 ## E; \ + xorl TAB+1024(,r1,4),r6 ## E;\ + xorl TAB(,r7,4),r3 ## E; \ movzbl r2 ## H,r1 ## E; \ movzbl r2 ## L,r7 ## E; \ shrl $16,r2 ## E; \ @@ -131,9 +128,9 @@ movl r4 ## E,r2 ## E; #define entry(FUNC,KEY,B128,B192) \ - prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11) + prologue(FUNC,KEY,B128,B192,R2,R8,R1,R3,R4,R6,R10,R5,R11) -#define return(FUNC) epilogue(FUNC,R8,R2,R9,R7,R5,R6,R3,R4,R11) +#define return(FUNC) epilogue(FUNC,R8,R2,R5,R6,R3,R4,R11) #define encrypt_round(TAB,OFFSET) \ round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \ -- cgit v1.2.3 From 03d7db5654aefb78bf2ded62e2187833fda2a6cc Mon Sep 17 00:00:00 2001 From: Corentin LABBE Date: Fri, 19 May 2017 08:53:23 +0200 Subject: crypto: hmac - add hmac IPAD/OPAD constant Many HMAC users directly use directly 0x36/0x5c values. It's better with crypto to use a name instead of directly some crypto constant. This patch simply add HMAC_IPAD_VALUE/HMAC_OPAD_VALUE defines in a new include file "crypto/hmac.h" and use them in crypto/hmac.c Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- crypto/hmac.c | 5 +++-- include/crypto/hmac.h | 7 +++++++ 2 files changed, 10 insertions(+), 2 deletions(-) create mode 100644 include/crypto/hmac.h diff --git a/crypto/hmac.c b/crypto/hmac.c index 72e38c098bb3..92871dc2a63e 100644 --- a/crypto/hmac.c +++ b/crypto/hmac.c @@ -16,6 +16,7 @@ * */ +#include #include #include #include @@ -74,8 +75,8 @@ static int hmac_setkey(struct crypto_shash *parent, memcpy(opad, ipad, bs); for (i = 0; i < bs; i++) { - ipad[i] ^= 0x36; - opad[i] ^= 0x5c; + ipad[i] ^= HMAC_IPAD_VALUE; + opad[i] ^= HMAC_OPAD_VALUE; } return crypto_shash_init(shash) ?: diff --git a/include/crypto/hmac.h b/include/crypto/hmac.h new file mode 100644 index 000000000000..ef09f7938204 --- /dev/null +++ b/include/crypto/hmac.h @@ -0,0 +1,7 @@ +#ifndef _CRYPTO_HMAC_H +#define _CRYPTO_HMAC_H + +#define HMAC_IPAD_VALUE 0x36 +#define HMAC_OPAD_VALUE 0x5c + +#endif /* _CRYPTO_HMAC_H */ -- cgit v1.2.3 From 1126d47db0cf40a5f31060095507ea5416a43344 Mon Sep 17 00:00:00 2001 From: Corentin LABBE Date: Fri, 19 May 2017 08:53:24 +0200 Subject: crypto: brcm - Use IPAD/OPAD constant This patch simply replace all occurrence of HMAC IPAD/OPAD value by their define. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/bcm/cipher.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/bcm/cipher.c b/drivers/crypto/bcm/cipher.c index cc0d5b98006e..61393dc70b0b 100644 --- a/drivers/crypto/bcm/cipher.c +++ b/drivers/crypto/bcm/cipher.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -2510,8 +2511,8 @@ static int ahash_hmac_setkey(struct crypto_ahash *ahash, const u8 *key, memcpy(ctx->opad, ctx->ipad, blocksize); for (index = 0; index < blocksize; index++) { - ctx->ipad[index] ^= 0x36; - ctx->opad[index] ^= 0x5c; + ctx->ipad[index] ^= HMAC_IPAD_VALUE; + ctx->opad[index] ^= HMAC_OPAD_VALUE; } flow_dump(" ipad: ", ctx->ipad, blocksize); -- cgit v1.2.3 From bb9634df0526593f60294f6f3c90787f7d4df7b0 Mon Sep 17 00:00:00 2001 From: Corentin LABBE Date: Fri, 19 May 2017 08:53:25 +0200 Subject: crypto: ixp4xx - Use IPAD/OPAD constant This patch simply replace all occurrence of HMAC IPAD/OPAD value by their define. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/ixp4xx_crypto.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index 771dd26c7076..427cbe012729 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -90,8 +91,6 @@ #define CTL_FLAG_PERFORM_AEAD 0x0008 #define CTL_FLAG_MASK 0x000f -#define HMAC_IPAD_VALUE 0x36 -#define HMAC_OPAD_VALUE 0x5C #define HMAC_PAD_BLOCKLEN SHA1_BLOCK_SIZE #define MD5_DIGEST_SIZE 16 -- cgit v1.2.3 From d477d813363ce6f393c310de8a2269b3eacd18a5 Mon Sep 17 00:00:00 2001 From: Corentin LABBE Date: Fri, 19 May 2017 08:53:26 +0200 Subject: crypto: marvell - Use IPAD/OPAD constant This patch simply replace all occurrence of HMAC IPAD/OPAD value by their define. Signed-off-by: Corentin Labbe Acked-by: Boris Brezillon Signed-off-by: Herbert Xu --- drivers/crypto/marvell/hash.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c index 77c0fb936f47..e61b08566093 100644 --- a/drivers/crypto/marvell/hash.c +++ b/drivers/crypto/marvell/hash.c @@ -12,6 +12,7 @@ * by the Free Software Foundation. */ +#include #include #include @@ -1164,8 +1165,8 @@ static int mv_cesa_ahmac_pad_init(struct ahash_request *req, memcpy(opad, ipad, blocksize); for (i = 0; i < blocksize; i++) { - ipad[i] ^= 0x36; - opad[i] ^= 0x5c; + ipad[i] ^= HMAC_IPAD_VALUE; + opad[i] ^= HMAC_OPAD_VALUE; } return 0; -- cgit v1.2.3 From 8139782dce737299385d7c9d91a2f62517540f6f Mon Sep 17 00:00:00 2001 From: Corentin LABBE Date: Fri, 19 May 2017 08:53:27 +0200 Subject: crypto: mv_cesa - Use IPAD/OPAD constant This patch simply replace all occurrence of HMAC IPAD/OPAD value by their define. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/mv_cesa.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c index 451fa18c1c7b..bf25f415eea6 100644 --- a/drivers/crypto/mv_cesa.c +++ b/drivers/crypto/mv_cesa.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -822,8 +823,8 @@ static int mv_hash_setkey(struct crypto_ahash *tfm, const u8 * key, memcpy(opad, ipad, bs); for (i = 0; i < bs; i++) { - ipad[i] ^= 0x36; - opad[i] ^= 0x5c; + ipad[i] ^= HMAC_IPAD_VALUE; + opad[i] ^= HMAC_OPAD_VALUE; } rc = crypto_shash_init(shash) ? : -- cgit v1.2.3 From ebd401e702dd88fb95a92617bf52ad41c3bf61cd Mon Sep 17 00:00:00 2001 From: Corentin LABBE Date: Fri, 19 May 2017 08:53:28 +0200 Subject: crypto: omap-sham - Use IPAD/OPAD constant This patch simply replace all occurrence of HMAC IPAD/OPAD value by their define. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/omap-sham.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index d0b16e5e4ee5..1864a57caaa4 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #define MD5_DIGEST_SIZE 16 @@ -1326,8 +1327,8 @@ static int omap_sham_setkey(struct crypto_ahash *tfm, const u8 *key, memcpy(bctx->opad, bctx->ipad, bs); for (i = 0; i < bs; i++) { - bctx->ipad[i] ^= 0x36; - bctx->opad[i] ^= 0x5c; + bctx->ipad[i] ^= HMAC_IPAD_VALUE; + bctx->opad[i] ^= HMAC_OPAD_VALUE; } } -- cgit v1.2.3 From f14011ad7cf79a590a39ad2dd7062153ef19ab6e Mon Sep 17 00:00:00 2001 From: Corentin LABBE Date: Fri, 19 May 2017 08:53:29 +0200 Subject: crypto: qat - Use IPAD/OPAD constant This patch simply replace all occurrence of HMAC IPAD/OPAD value by their define. Signed-off-by: Corentin Labbe Signed-off-by: Herbert Xu --- drivers/crypto/qat/qat_common/qat_algs.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index 20f35df8a01f..5b5efcc52cb5 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include @@ -178,8 +179,8 @@ static int qat_alg_do_precomputes(struct icp_qat_hw_auth_algo_blk *hash, for (i = 0; i < block_size; i++) { char *ipad_ptr = ipad + i; char *opad_ptr = opad + i; - *ipad_ptr ^= 0x36; - *opad_ptr ^= 0x5C; + *ipad_ptr ^= HMAC_IPAD_VALUE; + *opad_ptr ^= HMAC_OPAD_VALUE; } if (crypto_shash_init(shash)) -- cgit v1.2.3 From 1127eea914c74bf8c3cb3ddaa3dcaae628e87263 Mon Sep 17 00:00:00 2001 From: Corentin LABBE Date: Fri, 19 May 2017 08:53:30 +0200 Subject: crypto: mediatek - Use IPAD/OPAD constant This patch simply replace all occurrence of HMAC IPAD/OPAD value by their define. Signed-off-by: Corentin Labbe Reviewed-by: Matthias Brugger Signed-off-by: Herbert Xu --- drivers/crypto/mediatek/mtk-sha.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/mediatek/mtk-sha.c b/drivers/crypto/mediatek/mtk-sha.c index 2226f12d1c7a..5f4f845adbb8 100644 --- a/drivers/crypto/mediatek/mtk-sha.c +++ b/drivers/crypto/mediatek/mtk-sha.c @@ -12,6 +12,7 @@ * Some ideas are from atmel-sha.c and omap-sham.c drivers. */ +#include #include #include "mtk-platform.h" @@ -825,8 +826,8 @@ static int mtk_sha_setkey(struct crypto_ahash *tfm, const u8 *key, memcpy(bctx->opad, bctx->ipad, bs); for (i = 0; i < bs; i++) { - bctx->ipad[i] ^= 0x36; - bctx->opad[i] ^= 0x5c; + bctx->ipad[i] ^= HMAC_IPAD_VALUE; + bctx->opad[i] ^= HMAC_OPAD_VALUE; } return 0; -- cgit v1.2.3 From 6507c57bb013a22ae0f61bdbb2a63380598d34a2 Mon Sep 17 00:00:00 2001 From: Corentin LABBE Date: Fri, 19 May 2017 08:53:31 +0200 Subject: crypto: ccp - Use IPAD/OPAD constant This patch simply replace all occurrence of HMAC IPAD/OPAD value by their define. Signed-off-by: Corentin Labbe Acked-by: Gary R Hook Signed-off-by: Herbert Xu --- drivers/crypto/ccp/ccp-crypto-sha.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c index 6b46eea94932..ce97b3868f4a 100644 --- a/drivers/crypto/ccp/ccp-crypto-sha.c +++ b/drivers/crypto/ccp/ccp-crypto-sha.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -308,8 +309,8 @@ static int ccp_sha_setkey(struct crypto_ahash *tfm, const u8 *key, } for (i = 0; i < block_size; i++) { - ctx->u.sha.ipad[i] = ctx->u.sha.key[i] ^ 0x36; - ctx->u.sha.opad[i] = ctx->u.sha.key[i] ^ 0x5c; + ctx->u.sha.ipad[i] = ctx->u.sha.key[i] ^ HMAC_IPAD_VALUE; + ctx->u.sha.opad[i] = ctx->u.sha.key[i] ^ HMAC_OPAD_VALUE; } sg_init_one(&ctx->u.sha.opad_sg, ctx->u.sha.opad, block_size); -- cgit v1.2.3