diff options
author | Stephen Rothwell <sfr@canb.auug.org.au> | 2017-05-29 09:45:58 +1000 |
---|---|---|
committer | Stephen Rothwell <sfr@canb.auug.org.au> | 2017-05-29 09:45:58 +1000 |
commit | fc83d9afd1ad021737243eb122602e1b43ddbdf3 (patch) | |
tree | e7c3567162043d29bdb2d12691b5e76404e9c651 | |
parent | a1f3ad0c7d6aaf236388b03f35c07d0d158f74e4 (diff) | |
parent | 6507c57bb013a22ae0f61bdbb2a63380598d34a2 (diff) |
Merge remote-tracking branch 'crypto/master'
28 files changed, 1232 insertions, 292 deletions
diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S index c98e7e849f06..8550408735a0 100644 --- a/arch/arm64/crypto/sha1-ce-core.S +++ b/arch/arm64/crypto/sha1-ce-core.S @@ -82,7 +82,8 @@ ENTRY(sha1_ce_transform) ldr dgb, [x0, #16] /* load sha1_ce_state::finalize */ - ldr w4, [x0, #:lo12:sha1_ce_offsetof_finalize] + ldr_l w4, sha1_ce_offsetof_finalize, x4 + ldr w4, [x0, x4] /* load input */ 0: ld1 {v8.4s-v11.4s}, [x1], #64 @@ -132,7 +133,8 @@ CPU_LE( rev32 v11.16b, v11.16b ) * the padding is handled by the C code in that case. */ cbz x4, 3f - ldr x4, [x0, #:lo12:sha1_ce_offsetof_count] + ldr_l w4, sha1_ce_offsetof_count, x4 + ldr x4, [x0, x4] movi v9.2d, #0 mov x8, #0x80000000 movi v10.2d, #0 diff --git a/arch/arm64/crypto/sha1-ce-glue.c b/arch/arm64/crypto/sha1-ce-glue.c index aefda9868627..ea319c055f5d 100644 --- a/arch/arm64/crypto/sha1-ce-glue.c +++ b/arch/arm64/crypto/sha1-ce-glue.c @@ -17,9 +17,6 @@ #include <linux/crypto.h> #include <linux/module.h> -#define ASM_EXPORT(sym, val) \ - asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val)); - MODULE_DESCRIPTION("SHA1 secure hash using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); MODULE_LICENSE("GPL v2"); @@ -32,6 +29,9 @@ struct sha1_ce_state { asmlinkage void sha1_ce_transform(struct sha1_ce_state *sst, u8 const *src, int blocks); +const u32 sha1_ce_offsetof_count = offsetof(struct sha1_ce_state, sst.count); +const u32 sha1_ce_offsetof_finalize = offsetof(struct sha1_ce_state, finalize); + static int sha1_ce_update(struct shash_desc *desc, const u8 *data, unsigned int len) { @@ -52,11 +52,6 @@ static int sha1_ce_finup(struct shash_desc *desc, const u8 *data, struct sha1_ce_state *sctx = shash_desc_ctx(desc); bool finalize = !sctx->sst.count && !(len % SHA1_BLOCK_SIZE); - ASM_EXPORT(sha1_ce_offsetof_count, - offsetof(struct sha1_ce_state, sst.count)); - ASM_EXPORT(sha1_ce_offsetof_finalize, - offsetof(struct sha1_ce_state, finalize)); - /* * Allow the asm code to perform the finalization if there is no * partial data and the input is a round multiple of the block size. diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S index 01cfee066837..679c6c002f4f 100644 --- a/arch/arm64/crypto/sha2-ce-core.S +++ b/arch/arm64/crypto/sha2-ce-core.S @@ -88,7 +88,8 @@ ENTRY(sha2_ce_transform) ld1 {dgav.4s, dgbv.4s}, [x0] /* load sha256_ce_state::finalize */ - ldr w4, [x0, #:lo12:sha256_ce_offsetof_finalize] + ldr_l w4, sha256_ce_offsetof_finalize, x4 + ldr w4, [x0, x4] /* load input */ 0: ld1 {v16.4s-v19.4s}, [x1], #64 @@ -136,7 +137,8 @@ CPU_LE( rev32 v19.16b, v19.16b ) * the padding is handled by the C code in that case. */ cbz x4, 3f - ldr x4, [x0, #:lo12:sha256_ce_offsetof_count] + ldr_l w4, sha256_ce_offsetof_count, x4 + ldr x4, [x0, x4] movi v17.2d, #0 mov x8, #0x80000000 movi v18.2d, #0 diff --git a/arch/arm64/crypto/sha2-ce-glue.c b/arch/arm64/crypto/sha2-ce-glue.c index 7cd587564a41..0ed9486f75dd 100644 --- a/arch/arm64/crypto/sha2-ce-glue.c +++ b/arch/arm64/crypto/sha2-ce-glue.c @@ -17,9 +17,6 @@ #include <linux/crypto.h> #include <linux/module.h> -#define ASM_EXPORT(sym, val) \ - asm(".globl " #sym "; .set " #sym ", %0" :: "I"(val)); - MODULE_DESCRIPTION("SHA-224/SHA-256 secure hash using ARMv8 Crypto Extensions"); MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); MODULE_LICENSE("GPL v2"); @@ -32,6 +29,11 @@ struct sha256_ce_state { asmlinkage void sha2_ce_transform(struct sha256_ce_state *sst, u8 const *src, int blocks); +const u32 sha256_ce_offsetof_count = offsetof(struct sha256_ce_state, + sst.count); +const u32 sha256_ce_offsetof_finalize = offsetof(struct sha256_ce_state, + finalize); + static int sha256_ce_update(struct shash_desc *desc, const u8 *data, unsigned int len) { @@ -52,11 +54,6 @@ static int sha256_ce_finup(struct shash_desc *desc, const u8 *data, struct sha256_ce_state *sctx = shash_desc_ctx(desc); bool finalize = !sctx->sst.count && !(len % SHA256_BLOCK_SIZE); - ASM_EXPORT(sha256_ce_offsetof_count, - offsetof(struct sha256_ce_state, sst.count)); - ASM_EXPORT(sha256_ce_offsetof_finalize, - offsetof(struct sha256_ce_state, finalize)); - /* * Allow the asm code to perform the finalization if there is no * partial data and the input is a round multiple of the block size. diff --git a/arch/x86/crypto/aes-x86_64-asm_64.S b/arch/x86/crypto/aes-x86_64-asm_64.S index 910565547163..8739cf7795de 100644 --- a/arch/x86/crypto/aes-x86_64-asm_64.S +++ b/arch/x86/crypto/aes-x86_64-asm_64.S @@ -42,17 +42,15 @@ #define R5E %esi #define R6 %rdi #define R6E %edi -#define R7 %rbp -#define R7E %ebp +#define R7 %r9 /* don't use %rbp; it breaks stack traces */ +#define R7E %r9d #define R8 %r8 -#define R9 %r9 #define R10 %r10 #define R11 %r11 -#define prologue(FUNC,KEY,B128,B192,r1,r2,r3,r4,r5,r6,r7,r8,r9,r10,r11) \ +#define prologue(FUNC,KEY,B128,B192,r1,r2,r5,r6,r7,r8,r9,r10,r11) \ ENTRY(FUNC); \ movq r1,r2; \ - movq r3,r4; \ leaq KEY+48(r8),r9; \ movq r10,r11; \ movl (r7),r5 ## E; \ @@ -70,9 +68,8 @@ je B192; \ leaq 32(r9),r9; -#define epilogue(FUNC,r1,r2,r3,r4,r5,r6,r7,r8,r9) \ +#define epilogue(FUNC,r1,r2,r5,r6,r7,r8,r9) \ movq r1,r2; \ - movq r3,r4; \ movl r5 ## E,(r9); \ movl r6 ## E,4(r9); \ movl r7 ## E,8(r9); \ @@ -88,12 +85,12 @@ movl TAB(,r6,4),r6 ## E; \ roll $16,r2 ## E; \ shrl $16,r4 ## E; \ - movzbl r4 ## H,r7 ## E; \ - movzbl r4 ## L,r4 ## E; \ + movzbl r4 ## L,r7 ## E; \ + movzbl r4 ## H,r4 ## E; \ xorl OFFSET(r8),ra ## E; \ xorl OFFSET+4(r8),rb ## E; \ - xorl TAB+3072(,r7,4),r5 ## E;\ - xorl TAB+2048(,r4,4),r6 ## E;\ + xorl TAB+3072(,r4,4),r5 ## E;\ + xorl TAB+2048(,r7,4),r6 ## E;\ movzbl r1 ## L,r7 ## E; \ movzbl r1 ## H,r4 ## E; \ movl TAB+1024(,r4,4),r4 ## E;\ @@ -101,19 +98,19 @@ roll $16,r1 ## E; \ shrl $16,r3 ## E; \ xorl TAB(,r7,4),r5 ## E; \ - movzbl r3 ## H,r7 ## E; \ - movzbl r3 ## L,r3 ## E; \ - xorl TAB+3072(,r7,4),r4 ## E;\ - xorl TAB+2048(,r3,4),r5 ## E;\ - movzbl r1 ## H,r7 ## E; \ - movzbl r1 ## L,r3 ## E; \ + movzbl r3 ## L,r7 ## E; \ + movzbl r3 ## H,r3 ## E; \ + xorl TAB+3072(,r3,4),r4 ## E;\ + xorl TAB+2048(,r7,4),r5 ## E;\ + movzbl r1 ## L,r7 ## E; \ + movzbl r1 ## H,r3 ## E; \ shrl $16,r1 ## E; \ - xorl TAB+3072(,r7,4),r6 ## E;\ - movl TAB+2048(,r3,4),r3 ## E;\ - movzbl r1 ## H,r7 ## E; \ - movzbl r1 ## L,r1 ## E; \ - xorl TAB+1024(,r7,4),r6 ## E;\ - xorl TAB(,r1,4),r3 ## E; \ + xorl TAB+3072(,r3,4),r6 ## E;\ + movl TAB+2048(,r7,4),r3 ## E;\ + movzbl r1 ## L,r7 ## E; \ + movzbl r1 ## H,r1 ## E; \ + xorl TAB+1024(,r1,4),r6 ## E;\ + xorl TAB(,r7,4),r3 ## E; \ movzbl r2 ## H,r1 ## E; \ movzbl r2 ## L,r7 ## E; \ shrl $16,r2 ## E; \ @@ -131,9 +128,9 @@ movl r4 ## E,r2 ## E; #define entry(FUNC,KEY,B128,B192) \ - prologue(FUNC,KEY,B128,B192,R2,R8,R7,R9,R1,R3,R4,R6,R10,R5,R11) + prologue(FUNC,KEY,B128,B192,R2,R8,R1,R3,R4,R6,R10,R5,R11) -#define return(FUNC) epilogue(FUNC,R8,R2,R9,R7,R5,R6,R3,R4,R11) +#define return(FUNC) epilogue(FUNC,R8,R2,R5,R6,R3,R4,R11) #define encrypt_round(TAB,OFFSET) \ round(TAB,OFFSET,R1,R2,R3,R4,R5,R6,R7,R10,R5,R6,R3,R4) \ diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 3c465184ff8a..16627fec80b2 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -89,6 +89,29 @@ SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 ALL_F: .octa 0xffffffffffffffffffffffffffffffff .octa 0x00000000000000000000000000000000 +.section .rodata +.align 16 +.type aad_shift_arr, @object +.size aad_shift_arr, 272 +aad_shift_arr: + .octa 0xffffffffffffffffffffffffffffffff + .octa 0xffffffffffffffffffffffffffffff0C + .octa 0xffffffffffffffffffffffffffff0D0C + .octa 0xffffffffffffffffffffffffff0E0D0C + .octa 0xffffffffffffffffffffffff0F0E0D0C + .octa 0xffffffffffffffffffffff0C0B0A0908 + .octa 0xffffffffffffffffffff0D0C0B0A0908 + .octa 0xffffffffffffffffff0E0D0C0B0A0908 + .octa 0xffffffffffffffff0F0E0D0C0B0A0908 + .octa 0xffffffffffffff0C0B0A090807060504 + .octa 0xffffffffffff0D0C0B0A090807060504 + .octa 0xffffffffff0E0D0C0B0A090807060504 + .octa 0xffffffff0F0E0D0C0B0A090807060504 + .octa 0xffffff0C0B0A09080706050403020100 + .octa 0xffff0D0C0B0A09080706050403020100 + .octa 0xff0E0D0C0B0A09080706050403020100 + .octa 0x0F0E0D0C0B0A09080706050403020100 + .text @@ -252,32 +275,66 @@ XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation mov arg8, %r12 # %r12 = aadLen mov %r12, %r11 pxor %xmm\i, %xmm\i + pxor \XMM2, \XMM2 -_get_AAD_loop\num_initial_blocks\operation: - movd (%r10), \TMP1 - pslldq $12, \TMP1 - psrldq $4, %xmm\i + cmp $16, %r11 + jl _get_AAD_rest8\num_initial_blocks\operation +_get_AAD_blocks\num_initial_blocks\operation: + movdqu (%r10), %xmm\i + PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data + pxor %xmm\i, \XMM2 + GHASH_MUL \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + add $16, %r10 + sub $16, %r12 + sub $16, %r11 + cmp $16, %r11 + jge _get_AAD_blocks\num_initial_blocks\operation + + movdqu \XMM2, %xmm\i + cmp $0, %r11 + je _get_AAD_done\num_initial_blocks\operation + + pxor %xmm\i,%xmm\i + + /* read the last <16B of AAD. since we have at least 4B of + data right after the AAD (the ICV, and maybe some CT), we can + read 4B/8B blocks safely, and then get rid of the extra stuff */ +_get_AAD_rest8\num_initial_blocks\operation: + cmp $4, %r11 + jle _get_AAD_rest4\num_initial_blocks\operation + movq (%r10), \TMP1 + add $8, %r10 + sub $8, %r11 + pslldq $8, \TMP1 + psrldq $8, %xmm\i pxor \TMP1, %xmm\i + jmp _get_AAD_rest8\num_initial_blocks\operation +_get_AAD_rest4\num_initial_blocks\operation: + cmp $0, %r11 + jle _get_AAD_rest0\num_initial_blocks\operation + mov (%r10), %eax + movq %rax, \TMP1 add $4, %r10 - sub $4, %r12 - jne _get_AAD_loop\num_initial_blocks\operation - - cmp $16, %r11 - je _get_AAD_loop2_done\num_initial_blocks\operation - - mov $16, %r12 -_get_AAD_loop2\num_initial_blocks\operation: + sub $4, %r10 + pslldq $12, \TMP1 psrldq $4, %xmm\i - sub $4, %r12 - cmp %r11, %r12 - jne _get_AAD_loop2\num_initial_blocks\operation - -_get_AAD_loop2_done\num_initial_blocks\operation: + pxor \TMP1, %xmm\i +_get_AAD_rest0\num_initial_blocks\operation: + /* finalize: shift out the extra bytes we read, and align + left. since pslldq can only shift by an immediate, we use + vpshufb and an array of shuffle masks */ + movq %r12, %r11 + salq $4, %r11 + movdqu aad_shift_arr(%r11), \TMP1 + PSHUFB_XMM \TMP1, %xmm\i +_get_AAD_rest_final\num_initial_blocks\operation: PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data + pxor \XMM2, %xmm\i + GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 +_get_AAD_done\num_initial_blocks\operation: xor %r11, %r11 # initialise the data pointer offset as zero - - # start AES for num_initial_blocks blocks + # start AES for num_initial_blocks blocks mov %arg5, %rax # %rax = *Y0 movdqu (%rax), \XMM0 # XMM0 = Y0 @@ -322,7 +379,7 @@ aes_loop_initial_dec\num_initial_blocks: # prepare plaintext/ciphertext for GHASH computation .endr .endif - GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + # apply GHASH on num_initial_blocks blocks .if \i == 5 @@ -477,28 +534,66 @@ XMM2 XMM3 XMM4 XMMDst TMP6 TMP7 i i_seq operation mov arg8, %r12 # %r12 = aadLen mov %r12, %r11 pxor %xmm\i, %xmm\i -_get_AAD_loop\num_initial_blocks\operation: - movd (%r10), \TMP1 - pslldq $12, \TMP1 - psrldq $4, %xmm\i + pxor \XMM2, \XMM2 + + cmp $16, %r11 + jl _get_AAD_rest8\num_initial_blocks\operation +_get_AAD_blocks\num_initial_blocks\operation: + movdqu (%r10), %xmm\i + PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data + pxor %xmm\i, \XMM2 + GHASH_MUL \XMM2, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + add $16, %r10 + sub $16, %r12 + sub $16, %r11 + cmp $16, %r11 + jge _get_AAD_blocks\num_initial_blocks\operation + + movdqu \XMM2, %xmm\i + cmp $0, %r11 + je _get_AAD_done\num_initial_blocks\operation + + pxor %xmm\i,%xmm\i + + /* read the last <16B of AAD. since we have at least 4B of + data right after the AAD (the ICV, and maybe some PT), we can + read 4B/8B blocks safely, and then get rid of the extra stuff */ +_get_AAD_rest8\num_initial_blocks\operation: + cmp $4, %r11 + jle _get_AAD_rest4\num_initial_blocks\operation + movq (%r10), \TMP1 + add $8, %r10 + sub $8, %r11 + pslldq $8, \TMP1 + psrldq $8, %xmm\i pxor \TMP1, %xmm\i + jmp _get_AAD_rest8\num_initial_blocks\operation +_get_AAD_rest4\num_initial_blocks\operation: + cmp $0, %r11 + jle _get_AAD_rest0\num_initial_blocks\operation + mov (%r10), %eax + movq %rax, \TMP1 add $4, %r10 - sub $4, %r12 - jne _get_AAD_loop\num_initial_blocks\operation - cmp $16, %r11 - je _get_AAD_loop2_done\num_initial_blocks\operation - mov $16, %r12 -_get_AAD_loop2\num_initial_blocks\operation: + sub $4, %r10 + pslldq $12, \TMP1 psrldq $4, %xmm\i - sub $4, %r12 - cmp %r11, %r12 - jne _get_AAD_loop2\num_initial_blocks\operation -_get_AAD_loop2_done\num_initial_blocks\operation: + pxor \TMP1, %xmm\i +_get_AAD_rest0\num_initial_blocks\operation: + /* finalize: shift out the extra bytes we read, and align + left. since pslldq can only shift by an immediate, we use + vpshufb and an array of shuffle masks */ + movq %r12, %r11 + salq $4, %r11 + movdqu aad_shift_arr(%r11), \TMP1 + PSHUFB_XMM \TMP1, %xmm\i +_get_AAD_rest_final\num_initial_blocks\operation: PSHUFB_XMM %xmm14, %xmm\i # byte-reflect the AAD data + pxor \XMM2, %xmm\i + GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 +_get_AAD_done\num_initial_blocks\operation: xor %r11, %r11 # initialise the data pointer offset as zero - - # start AES for num_initial_blocks blocks + # start AES for num_initial_blocks blocks mov %arg5, %rax # %rax = *Y0 movdqu (%rax), \XMM0 # XMM0 = Y0 @@ -543,7 +638,7 @@ aes_loop_initial_enc\num_initial_blocks: # prepare plaintext/ciphertext for GHASH computation .endr .endif - GHASH_MUL %xmm\i, \TMP3, \TMP1, \TMP2, \TMP4, \TMP5, \XMM1 + # apply GHASH on num_initial_blocks blocks .if \i == 5 @@ -1454,18 +1549,35 @@ _return_T_decrypt: mov arg10, %r11 # %r11 = auth_tag_len cmp $16, %r11 je _T_16_decrypt - cmp $12, %r11 - je _T_12_decrypt + cmp $8, %r11 + jl _T_4_decrypt _T_8_decrypt: MOVQ_R64_XMM %xmm0, %rax mov %rax, (%r10) - jmp _return_T_done_decrypt -_T_12_decrypt: - MOVQ_R64_XMM %xmm0, %rax - mov %rax, (%r10) + add $8, %r10 + sub $8, %r11 psrldq $8, %xmm0 + cmp $0, %r11 + je _return_T_done_decrypt +_T_4_decrypt: + movd %xmm0, %eax + mov %eax, (%r10) + add $4, %r10 + sub $4, %r11 + psrldq $4, %xmm0 + cmp $0, %r11 + je _return_T_done_decrypt +_T_123_decrypt: movd %xmm0, %eax - mov %eax, 8(%r10) + cmp $2, %r11 + jl _T_1_decrypt + mov %ax, (%r10) + cmp $2, %r11 + je _return_T_done_decrypt + add $2, %r10 + sar $16, %eax +_T_1_decrypt: + mov %al, (%r10) jmp _return_T_done_decrypt _T_16_decrypt: movdqu %xmm0, (%r10) @@ -1718,18 +1830,35 @@ _return_T_encrypt: mov arg10, %r11 # %r11 = auth_tag_len cmp $16, %r11 je _T_16_encrypt - cmp $12, %r11 - je _T_12_encrypt + cmp $8, %r11 + jl _T_4_encrypt _T_8_encrypt: MOVQ_R64_XMM %xmm0, %rax mov %rax, (%r10) - jmp _return_T_done_encrypt -_T_12_encrypt: - MOVQ_R64_XMM %xmm0, %rax - mov %rax, (%r10) + add $8, %r10 + sub $8, %r11 psrldq $8, %xmm0 + cmp $0, %r11 + je _return_T_done_encrypt +_T_4_encrypt: + movd %xmm0, %eax + mov %eax, (%r10) + add $4, %r10 + sub $4, %r11 + psrldq $4, %xmm0 + cmp $0, %r11 + je _return_T_done_encrypt +_T_123_encrypt: movd %xmm0, %eax - mov %eax, 8(%r10) + cmp $2, %r11 + jl _T_1_encrypt + mov %ax, (%r10) + cmp $2, %r11 + je _return_T_done_encrypt + add $2, %r10 + sar $16, %eax +_T_1_encrypt: + mov %al, (%r10) jmp _return_T_done_encrypt _T_16_encrypt: movdqu %xmm0, (%r10) diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S index d664382c6e56..faecb1518bf8 100644 --- a/arch/x86/crypto/aesni-intel_avx-x86_64.S +++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S @@ -155,6 +155,30 @@ SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 ALL_F: .octa 0xffffffffffffffffffffffffffffffff .octa 0x00000000000000000000000000000000 +.section .rodata +.align 16 +.type aad_shift_arr, @object +.size aad_shift_arr, 272 +aad_shift_arr: + .octa 0xffffffffffffffffffffffffffffffff + .octa 0xffffffffffffffffffffffffffffff0C + .octa 0xffffffffffffffffffffffffffff0D0C + .octa 0xffffffffffffffffffffffffff0E0D0C + .octa 0xffffffffffffffffffffffff0F0E0D0C + .octa 0xffffffffffffffffffffff0C0B0A0908 + .octa 0xffffffffffffffffffff0D0C0B0A0908 + .octa 0xffffffffffffffffff0E0D0C0B0A0908 + .octa 0xffffffffffffffff0F0E0D0C0B0A0908 + .octa 0xffffffffffffff0C0B0A090807060504 + .octa 0xffffffffffff0D0C0B0A090807060504 + .octa 0xffffffffff0E0D0C0B0A090807060504 + .octa 0xffffffff0F0E0D0C0B0A090807060504 + .octa 0xffffff0C0B0A09080706050403020100 + .octa 0xffff0D0C0B0A09080706050403020100 + .octa 0xff0E0D0C0B0A09080706050403020100 + .octa 0x0F0E0D0C0B0A09080706050403020100 + + .text @@ -372,41 +396,72 @@ VARIABLE_OFFSET = 16*8 .macro INITIAL_BLOCKS_AVX num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC i = (8-\num_initial_blocks) + j = 0 setreg - mov arg6, %r10 # r10 = AAD - mov arg7, %r12 # r12 = aadLen - - - mov %r12, %r11 - - vpxor reg_i, reg_i, reg_i -_get_AAD_loop\@: - vmovd (%r10), \T1 - vpslldq $12, \T1, \T1 - vpsrldq $4, reg_i, reg_i - vpxor \T1, reg_i, reg_i - - add $4, %r10 - sub $4, %r12 - jg _get_AAD_loop\@ - - - cmp $16, %r11 - je _get_AAD_loop2_done\@ - mov $16, %r12 - -_get_AAD_loop2\@: - vpsrldq $4, reg_i, reg_i - sub $4, %r12 - cmp %r11, %r12 - jg _get_AAD_loop2\@ - -_get_AAD_loop2_done\@: - - #byte-reflect the AAD data - vpshufb SHUF_MASK(%rip), reg_i, reg_i - + mov arg6, %r10 # r10 = AAD + mov arg7, %r12 # r12 = aadLen + + + mov %r12, %r11 + + vpxor reg_j, reg_j, reg_j + vpxor reg_i, reg_i, reg_i + cmp $16, %r11 + jl _get_AAD_rest8\@ +_get_AAD_blocks\@: + vmovdqu (%r10), reg_i + vpshufb SHUF_MASK(%rip), reg_i, reg_i + vpxor reg_i, reg_j, reg_j + GHASH_MUL_AVX reg_j, \T2, \T1, \T3, \T4, \T5, \T6 + add $16, %r10 + sub $16, %r12 + sub $16, %r11 + cmp $16, %r11 + jge _get_AAD_blocks\@ + vmovdqu reg_j, reg_i + cmp $0, %r11 + je _get_AAD_done\@ + + vpxor reg_i, reg_i, reg_i + + /* read the last <16B of AAD. since we have at least 4B of + data right after the AAD (the ICV, and maybe some CT), we can + read 4B/8B blocks safely, and then get rid of the extra stuff */ +_get_AAD_rest8\@: + cmp $4, %r11 + jle _get_AAD_rest4\@ + movq (%r10), \T1 + add $8, %r10 + sub $8, %r11 + vpslldq $8, \T1, \T1 + vpsrldq $8, reg_i, reg_i + vpxor \T1, reg_i, reg_i + jmp _get_AAD_rest8\@ +_get_AAD_rest4\@: + cmp $0, %r11 + jle _get_AAD_rest0\@ + mov (%r10), %eax + movq %rax, \T1 + add $4, %r10 + sub $4, %r11 + vpslldq $12, \T1, \T1 + vpsrldq $4, reg_i, reg_i + vpxor \T1, reg_i, reg_i +_get_AAD_rest0\@: + /* finalize: shift out the extra bytes we read, and align + left. since pslldq can only shift by an immediate, we use + vpshufb and an array of shuffle masks */ + movq %r12, %r11 + salq $4, %r11 + movdqu aad_shift_arr(%r11), \T1 + vpshufb \T1, reg_i, reg_i +_get_AAD_rest_final\@: + vpshufb SHUF_MASK(%rip), reg_i, reg_i + vpxor reg_j, reg_i, reg_i + GHASH_MUL_AVX reg_i, \T2, \T1, \T3, \T4, \T5, \T6 + +_get_AAD_done\@: # initialize the data pointer offset as zero xor %r11, %r11 @@ -480,7 +535,6 @@ _get_AAD_loop2_done\@: i = (8-\num_initial_blocks) j = (9-\num_initial_blocks) setreg - GHASH_MUL_AVX reg_i, \T2, \T1, \T3, \T4, \T5, \T6 .rep \num_initial_blocks vpxor reg_i, reg_j, reg_j @@ -1427,19 +1481,36 @@ _return_T\@: cmp $16, %r11 je _T_16\@ - cmp $12, %r11 - je _T_12\@ + cmp $8, %r11 + jl _T_4\@ _T_8\@: vmovq %xmm9, %rax mov %rax, (%r10) - jmp _return_T_done\@ -_T_12\@: - vmovq %xmm9, %rax - mov %rax, (%r10) + add $8, %r10 + sub $8, %r11 vpsrldq $8, %xmm9, %xmm9 + cmp $0, %r11 + je _return_T_done\@ +_T_4\@: vmovd %xmm9, %eax - mov %eax, 8(%r10) + mov %eax, (%r10) + add $4, %r10 + sub $4, %r11 + vpsrldq $4, %xmm9, %xmm9 + cmp $0, %r11 + je _return_T_done\@ +_T_123\@: + vmovd %xmm9, %eax + cmp $2, %r11 + jl _T_1\@ + mov %ax, (%r10) + cmp $2, %r11 + je _return_T_done\@ + add $2, %r10 + sar $16, %eax +_T_1\@: + mov %al, (%r10) jmp _return_T_done\@ _T_16\@: @@ -1631,41 +1702,73 @@ ENDPROC(aesni_gcm_dec_avx_gen2) .macro INITIAL_BLOCKS_AVX2 num_initial_blocks T1 T2 T3 T4 T5 CTR XMM1 XMM2 XMM3 XMM4 XMM5 XMM6 XMM7 XMM8 T6 T_key ENC_DEC VER i = (8-\num_initial_blocks) + j = 0 setreg - mov arg6, %r10 # r10 = AAD - mov arg7, %r12 # r12 = aadLen - - - mov %r12, %r11 - - vpxor reg_i, reg_i, reg_i -_get_AAD_loop\@: - vmovd (%r10), \T1 - vpslldq $12, \T1, \T1 - vpsrldq $4, reg_i, reg_i - vpxor \T1, reg_i, reg_i - - add $4, %r10 - sub $4, %r12 - jg _get_AAD_loop\@ - - - cmp $16, %r11 - je _get_AAD_loop2_done\@ - mov $16, %r12 - -_get_AAD_loop2\@: - vpsrldq $4, reg_i, reg_i - sub $4, %r12 - cmp %r11, %r12 - jg _get_AAD_loop2\@ - -_get_AAD_loop2_done\@: - - #byte-reflect the AAD data - vpshufb SHUF_MASK(%rip), reg_i, reg_i - + mov arg6, %r10 # r10 = AAD + mov arg7, %r12 # r12 = aadLen + + + mov %r12, %r11 + + vpxor reg_j, reg_j, reg_j + vpxor reg_i, reg_i, reg_i + + cmp $16, %r11 + jl _get_AAD_rest8\@ +_get_AAD_blocks\@: + vmovdqu (%r10), reg_i + vpshufb SHUF_MASK(%rip), reg_i, reg_i + vpxor reg_i, reg_j, reg_j + GHASH_MUL_AVX2 reg_j, \T2, \T1, \T3, \T4, \T5, \T6 + add $16, %r10 + sub $16, %r12 + sub $16, %r11 + cmp $16, %r11 + jge _get_AAD_blocks\@ + vmovdqu reg_j, reg_i + cmp $0, %r11 + je _get_AAD_done\@ + + vpxor reg_i, reg_i, reg_i + + /* read the last <16B of AAD. since we have at least 4B of + data right after the AAD (the ICV, and maybe some CT), we can + read 4B/8B blocks safely, and then get rid of the extra stuff */ +_get_AAD_rest8\@: + cmp $4, %r11 + jle _get_AAD_rest4\@ + movq (%r10), \T1 + add $8, %r10 + sub $8, %r11 + vpslldq $8, \T1, \T1 + vpsrldq $8, reg_i, reg_i + vpxor \T1, reg_i, reg_i + jmp _get_AAD_rest8\@ +_get_AAD_rest4\@: + cmp $0, %r11 + jle _get_AAD_rest0\@ + mov (%r10), %eax + movq %rax, \T1 + add $4, %r10 + sub $4, %r11 + vpslldq $12, \T1, \T1 + vpsrldq $4, reg_i, reg_i + vpxor \T1, reg_i, reg_i +_get_AAD_rest0\@: + /* finalize: shift out the extra bytes we read, and align + left. since pslldq can only shift by an immediate, we use + vpshufb and an array of shuffle masks */ + movq %r12, %r11 + salq $4, %r11 + movdqu aad_shift_arr(%r11), \T1 + vpshufb \T1, reg_i, reg_i +_get_AAD_rest_final\@: + vpshufb SHUF_MASK(%rip), reg_i, reg_i + vpxor reg_j, reg_i, reg_i + GHASH_MUL_AVX2 reg_i, \T2, \T1, \T3, \T4, \T5, \T6 + +_get_AAD_done\@: # initialize the data pointer offset as zero xor %r11, %r11 @@ -1740,7 +1843,6 @@ _get_AAD_loop2_done\@: i = (8-\num_initial_blocks) j = (9-\num_initial_blocks) setreg - GHASH_MUL_AVX2 reg_i, \T2, \T1, \T3, \T4, \T5, \T6 .rep \num_initial_blocks vpxor reg_i, reg_j, reg_j @@ -2702,19 +2804,36 @@ _return_T\@: cmp $16, %r11 je _T_16\@ - cmp $12, %r11 - je _T_12\@ + cmp $8, %r11 + jl _T_4\@ _T_8\@: vmovq %xmm9, %rax mov %rax, (%r10) - jmp _return_T_done\@ -_T_12\@: - vmovq %xmm9, %rax - mov %rax, (%r10) + add $8, %r10 + sub $8, %r11 vpsrldq $8, %xmm9, %xmm9 + cmp $0, %r11 + je _return_T_done\@ +_T_4\@: vmovd %xmm9, %eax - mov %eax, 8(%r10) + mov %eax, (%r10) + add $4, %r10 + sub $4, %r11 + vpsrldq $4, %xmm9, %xmm9 + cmp $0, %r11 + je _return_T_done\@ +_T_123\@: + vmovd %xmm9, %eax + cmp $2, %r11 + jl _T_1\@ + mov %ax, (%r10) + cmp $2, %r11 + je _return_T_done\@ + add $2, %r10 + sar $16, %eax +_T_1\@: + mov %al, (%r10) jmp _return_T_done\@ _T_16\@: diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 93de8ea51548..4a55cdcdc008 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -61,6 +61,11 @@ struct aesni_rfc4106_gcm_ctx { u8 nonce[4]; }; +struct generic_gcmaes_ctx { + u8 hash_subkey[16] AESNI_ALIGN_ATTR; + struct crypto_aes_ctx aes_key_expanded AESNI_ALIGN_ATTR; +}; + struct aesni_xts_ctx { u8 raw_tweak_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR; u8 raw_crypt_ctx[sizeof(struct crypto_aes_ctx)] AESNI_ALIGN_ATTR; @@ -102,13 +107,11 @@ asmlinkage void aesni_xts_crypt8(struct crypto_aes_ctx *ctx, u8 *out, * u8 *out, Ciphertext output. Encrypt in-place is allowed. * const u8 *in, Plaintext input * unsigned long plaintext_len, Length of data in bytes for encryption. - * u8 *iv, Pre-counter block j0: 4 byte salt (from Security Association) - * concatenated with 8 byte Initialisation Vector (from IPSec ESP - * Payload) concatenated with 0x00000001. 16-byte aligned pointer. + * u8 *iv, Pre-counter block j0: 12 byte IV concatenated with 0x00000001. + * 16-byte aligned pointer. * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. * const u8 *aad, Additional Authentication Data (AAD) - * unsigned long aad_len, Length of AAD in bytes. With RFC4106 this - * is going to be 8 or 12 bytes + * unsigned long aad_len, Length of AAD in bytes. * u8 *auth_tag, Authenticated Tag output. * unsigned long auth_tag_len), Authenticated Tag Length in bytes. * Valid values are 16 (most likely), 12 or 8. @@ -123,9 +126,8 @@ asmlinkage void aesni_gcm_enc(void *ctx, u8 *out, * u8 *out, Plaintext output. Decrypt in-place is allowed. * const u8 *in, Ciphertext input * unsigned long ciphertext_len, Length of data in bytes for decryption. - * u8 *iv, Pre-counter block j0: 4 byte salt (from Security Association) - * concatenated with 8 byte Initialisation Vector (from IPSec ESP - * Payload) concatenated with 0x00000001. 16-byte aligned pointer. + * u8 *iv, Pre-counter block j0: 12 byte IV concatenated with 0x00000001. + * 16-byte aligned pointer. * u8 *hash_subkey, the Hash sub key input. Data starts on a 16-byte boundary. * const u8 *aad, Additional Authentication Data (AAD) * unsigned long aad_len, Length of AAD in bytes. With RFC4106 this is going @@ -275,6 +277,16 @@ aesni_rfc4106_gcm_ctx *aesni_rfc4106_gcm_ctx_get(struct crypto_aead *tfm) align = 1; return PTR_ALIGN(crypto_aead_ctx(tfm), align); } + +static inline struct +generic_gcmaes_ctx *generic_gcmaes_ctx_get(struct crypto_aead *tfm) +{ + unsigned long align = AESNI_ALIGN; + + if (align <= crypto_tfm_ctx_alignment()) + align = 1; + return PTR_ALIGN(crypto_aead_ctx(tfm), align); +} #endif static inline struct crypto_aes_ctx *aes_ctx(void *raw_ctx) @@ -712,32 +724,34 @@ static int rfc4106_set_authsize(struct crypto_aead *parent, return crypto_aead_setauthsize(&cryptd_tfm->base, authsize); } -static int helper_rfc4106_encrypt(struct aead_request *req) +static int generic_gcmaes_set_authsize(struct crypto_aead *tfm, + unsigned int authsize) +{ + switch (authsize) { + case 4: + case 8: + case 12: + case 13: + case 14: + case 15: + case 16: + break; + default: + return -EINVAL; + } + + return 0; +} + +static int gcmaes_encrypt(struct aead_request *req, unsigned int assoclen, + u8 *hash_subkey, u8 *iv, void *aes_ctx) { u8 one_entry_in_sg = 0; u8 *src, *dst, *assoc; - __be32 counter = cpu_to_be32(1); struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); - void *aes_ctx = &(ctx->aes_key_expanded); unsigned long auth_tag_len = crypto_aead_authsize(tfm); - u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); struct scatter_walk src_sg_walk; struct scatter_walk dst_sg_walk = {}; - unsigned int i; - - /* Assuming we are supporting rfc4106 64-bit extended */ - /* sequence numbers We need to have the AAD length equal */ - /* to 16 or 20 bytes */ - if (unlikely(req->assoclen != 16 && req->assoclen != 20)) - return -EINVAL; - - /* IV below built */ - for (i = 0; i < 4; i++) - *(iv+i) = ctx->nonce[i]; - for (i = 0; i < 8; i++) - *(iv+4+i) = req->iv[i]; - *((__be32 *)(iv+12)) = counter; if (sg_is_last(req->src) && (!PageHighMem(sg_page(req->src)) || @@ -768,7 +782,7 @@ static int helper_rfc4106_encrypt(struct aead_request *req) kernel_fpu_begin(); aesni_gcm_enc_tfm(aes_ctx, dst, src, req->cryptlen, iv, - ctx->hash_subkey, assoc, req->assoclen - 8, + hash_subkey, assoc, assoclen, dst + req->cryptlen, auth_tag_len); kernel_fpu_end(); @@ -791,37 +805,20 @@ static int helper_rfc4106_encrypt(struct aead_request *req) return 0; } -static int helper_rfc4106_decrypt(struct aead_request *req) +static int gcmaes_decrypt(struct aead_request *req, unsigned int assoclen, + u8 *hash_subkey, u8 *iv, void *aes_ctx) { u8 one_entry_in_sg = 0; u8 *src, *dst, *assoc; unsigned long tempCipherLen = 0; - __be32 counter = cpu_to_be32(1); - int retval = 0; struct crypto_aead *tfm = crypto_aead_reqtfm(req); - struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); - void *aes_ctx = &(ctx->aes_key_expanded); unsigned long auth_tag_len = crypto_aead_authsize(tfm); - u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); u8 authTag[16]; struct scatter_walk src_sg_walk; struct scatter_walk dst_sg_walk = {}; - unsigned int i; - - if (unlikely(req->assoclen != 16 && req->assoclen != 20)) - return -EINVAL; - - /* Assuming we are supporting rfc4106 64-bit extended */ - /* sequence numbers We need to have the AAD length */ - /* equal to 16 or 20 bytes */ + int retval = 0; tempCipherLen = (unsigned long)(req->cryptlen - auth_tag_len); - /* IV below built */ - for (i = 0; i < 4; i++) - *(iv+i) = ctx->nonce[i]; - for (i = 0; i < 8; i++) - *(iv+4+i) = req->iv[i]; - *((__be32 *)(iv+12)) = counter; if (sg_is_last(req->src) && (!PageHighMem(sg_page(req->src)) || @@ -838,7 +835,6 @@ static int helper_rfc4106_decrypt(struct aead_request *req) scatterwalk_start(&dst_sg_walk, req->dst); dst = scatterwalk_map(&dst_sg_walk) + req->assoclen; } - } else { /* Allocate memory for src, dst, assoc */ assoc = kmalloc(req->cryptlen + req->assoclen, GFP_ATOMIC); @@ -850,9 +846,10 @@ static int helper_rfc4106_decrypt(struct aead_request *req) dst = src; } + kernel_fpu_begin(); aesni_gcm_dec_tfm(aes_ctx, dst, src, tempCipherLen, iv, - ctx->hash_subkey, assoc, req->assoclen - 8, + hash_subkey, assoc, assoclen, authTag, auth_tag_len); kernel_fpu_end(); @@ -875,6 +872,60 @@ static int helper_rfc4106_decrypt(struct aead_request *req) kfree(assoc); } return retval; + +} + +static int helper_rfc4106_encrypt(struct aead_request *req) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); + void *aes_ctx = &(ctx->aes_key_expanded); + u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); + unsigned int i; + __be32 counter = cpu_to_be32(1); + + /* Assuming we are supporting rfc4106 64-bit extended */ + /* sequence numbers We need to have the AAD length equal */ + /* to 16 or 20 bytes */ + if (unlikely(req->assoclen != 16 && req->assoclen != 20)) + return -EINVAL; + + /* IV below built */ + for (i = 0; i < 4; i++) + *(iv+i) = ctx->nonce[i]; + for (i = 0; i < 8; i++) + *(iv+4+i) = req->iv[i]; + *((__be32 *)(iv+12)) = counter; + + return gcmaes_encrypt(req, req->assoclen - 8, ctx->hash_subkey, iv, + aes_ctx); +} + +static int helper_rfc4106_decrypt(struct aead_request *req) +{ + __be32 counter = cpu_to_be32(1); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); + void *aes_ctx = &(ctx->aes_key_expanded); + u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); + unsigned int i; + + if (unlikely(req->assoclen != 16 && req->assoclen != 20)) + return -EINVAL; + + /* Assuming we are supporting rfc4106 64-bit extended */ + /* sequence numbers We need to have the AAD length */ + /* equal to 16 or 20 bytes */ + + /* IV below built */ + for (i = 0; i < 4; i++) + *(iv+i) = ctx->nonce[i]; + for (i = 0; i < 8; i++) + *(iv+4+i) = req->iv[i]; + *((__be32 *)(iv+12)) = counter; + + return gcmaes_decrypt(req, req->assoclen - 8, ctx->hash_subkey, iv, + aes_ctx); } static int rfc4106_encrypt(struct aead_request *req) @@ -1035,6 +1086,46 @@ struct { }; #ifdef CONFIG_X86_64 +static int generic_gcmaes_set_key(struct crypto_aead *aead, const u8 *key, + unsigned int key_len) +{ + struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(aead); + + return aes_set_key_common(crypto_aead_tfm(aead), + &ctx->aes_key_expanded, key, key_len) ?: + rfc4106_set_hash_subkey(ctx->hash_subkey, key, key_len); +} + +static int generic_gcmaes_encrypt(struct aead_request *req) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct generic_gcmaes_ctx *ctx = generic_gcmaes_ctx_get(tfm); + void *aes_ctx = &(ctx->aes_key_expanded); + u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); + __be32 counter = cpu_to_be32(1); + + memcpy(iv, req->iv, 12); + *((__be32 *)(iv+12)) = counter; + + return gcmaes_encrypt(req, req->assoclen, ctx->hash_subkey, iv, + aes_ctx); +} + +static int generic_gcmaes_decrypt(struct aead_request *req) +{ + __be32 counter = cpu_to_be32(1); + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aesni_rfc4106_gcm_ctx *ctx = aesni_rfc4106_gcm_ctx_get(tfm); + void *aes_ctx = &(ctx->aes_key_expanded); + u8 iv[16] __attribute__ ((__aligned__(AESNI_ALIGN))); + + memcpy(iv, req->iv, 12); + *((__be32 *)(iv+12)) = counter; + + return gcmaes_decrypt(req, req->assoclen, ctx->hash_subkey, iv, + aes_ctx); +} + static struct aead_alg aesni_aead_algs[] = { { .setkey = common_rfc4106_set_key, .setauthsize = common_rfc4106_set_authsize, @@ -1069,6 +1160,23 @@ static struct aead_alg aesni_aead_algs[] = { { .cra_ctxsize = sizeof(struct cryptd_aead *), .cra_module = THIS_MODULE, }, +}, { + .setkey = generic_gcmaes_set_key, + .setauthsize = generic_gcmaes_set_authsize, + .encrypt = generic_gcmaes_encrypt, + .decrypt = generic_gcmaes_decrypt, + .ivsize = 12, + .maxauthsize = 16, + .base = { + .cra_name = "gcm(aes)", + .cra_driver_name = "generic-gcm-aesni", + .cra_priority = 400, + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct generic_gcmaes_ctx), + .cra_alignmask = AESNI_ALIGN - 1, + .cra_module = THIS_MODULE, + }, } }; #else static struct aead_alg aesni_aead_algs[0]; diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c index 2dd3674b5a1e..458409b7568d 100644 --- a/arch/x86/crypto/sha512-mb/sha512_mb.c +++ b/arch/x86/crypto/sha512-mb/sha512_mb.c @@ -269,19 +269,19 @@ static struct sha512_hash_ctx * LAST */ ctx->error = HASH_CTX_ERROR_INVALID_FLAGS; - return ctx; + goto unlock; } if (ctx->status & HASH_CTX_STS_PROCESSING) { /* Cannot submit to a currently processing job. */ ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING; - return ctx; + goto unlock; } if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) { /* Cannot update a finished job. */ ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED; - return ctx; + goto unlock; } @@ -363,6 +363,7 @@ static struct sha512_hash_ctx } ctx = sha512_ctx_mgr_resubmit(mgr, ctx); +unlock: spin_unlock_irqrestore(&cstate->work_lock, irqflags); return ctx; } diff --git a/crypto/hmac.c b/crypto/hmac.c index 72e38c098bb3..92871dc2a63e 100644 --- a/crypto/hmac.c +++ b/crypto/hmac.c @@ -16,6 +16,7 @@ * */ +#include <crypto/hmac.h> #include <crypto/internal/hash.h> #include <crypto/scatterwalk.h> #include <linux/err.h> @@ -74,8 +75,8 @@ static int hmac_setkey(struct crypto_shash *parent, memcpy(opad, ipad, bs); for (i = 0; i < bs; i++) { - ipad[i] ^= 0x36; - opad[i] ^= 0x5c; + ipad[i] ^= HMAC_IPAD_VALUE; + opad[i] ^= HMAC_OPAD_VALUE; } return crypto_shash_init(shash) ?: diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 9a11f3c2bf98..0dd6a432d6ca 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -138,8 +138,6 @@ static int test_aead_cycles(struct aead_request *req, int enc, int blen) int ret = 0; int i; - local_irq_disable(); - /* Warm-up run. */ for (i = 0; i < 4; i++) { if (enc) @@ -169,8 +167,6 @@ static int test_aead_cycles(struct aead_request *req, int enc, int blen) } out: - local_irq_enable(); - if (ret == 0) printk("1 operation in %lu cycles (%d bytes)\n", (cycles + 4) / 8, blen); diff --git a/drivers/char/hw_random/omap3-rom-rng.c b/drivers/char/hw_random/omap3-rom-rng.c index 37a58d78aab3..38b719017186 100644 --- a/drivers/char/hw_random/omap3-rom-rng.c +++ b/drivers/char/hw_random/omap3-rom-rng.c @@ -53,7 +53,10 @@ static int omap3_rom_rng_get_random(void *buf, unsigned int count) cancel_delayed_work_sync(&idle_work); if (rng_idle) { - clk_prepare_enable(rng_clk); + r = clk_prepare_enable(rng_clk); + if (r) + return r; + r = omap3_rom_rng_call(0, 0, RNG_GEN_PRNG_HW_INIT); if (r != 0) { clk_disable_unprepare(rng_clk); @@ -88,6 +91,8 @@ static struct hwrng omap3_rom_rng_ops = { static int omap3_rom_rng_probe(struct platform_device *pdev) { + int ret = 0; + pr_info("initializing\n"); omap3_rom_rng_call = pdev->dev.platform_data; @@ -104,7 +109,9 @@ static int omap3_rom_rng_probe(struct platform_device *pdev) } /* Leave the RNG in reset state. */ - clk_prepare_enable(rng_clk); + ret = clk_prepare_enable(rng_clk); + if (ret) + return ret; omap3_rom_rng_idle(0); return hwrng_register(&omap3_rom_rng_ops); diff --git a/drivers/crypto/bcm/cipher.c b/drivers/crypto/bcm/cipher.c index cc0d5b98006e..61393dc70b0b 100644 --- a/drivers/crypto/bcm/cipher.c +++ b/drivers/crypto/bcm/cipher.c @@ -36,6 +36,7 @@ #include <crypto/internal/aead.h> #include <crypto/aes.h> #include <crypto/des.h> +#include <crypto/hmac.h> #include <crypto/sha.h> #include <crypto/md5.h> #include <crypto/authenc.h> @@ -2510,8 +2511,8 @@ static int ahash_hmac_setkey(struct crypto_ahash *ahash, const u8 *key, memcpy(ctx->opad, ctx->ipad, blocksize); for (index = 0; index < blocksize; index++) { - ctx->ipad[index] ^= 0x36; - ctx->opad[index] ^= 0x5c; + ctx->ipad[index] ^= HMAC_IPAD_VALUE; + ctx->opad[index] ^= HMAC_OPAD_VALUE; } flow_dump(" ipad: ", ctx->ipad, blocksize); diff --git a/drivers/crypto/caam/caampkc.c b/drivers/crypto/caam/caampkc.c index 49cbdcba7883..57f399caa977 100644 --- a/drivers/crypto/caam/caampkc.c +++ b/drivers/crypto/caam/caampkc.c @@ -18,6 +18,10 @@ #define DESC_RSA_PUB_LEN (2 * CAAM_CMD_SZ + sizeof(struct rsa_pub_pdb)) #define DESC_RSA_PRIV_F1_LEN (2 * CAAM_CMD_SZ + \ sizeof(struct rsa_priv_f1_pdb)) +#define DESC_RSA_PRIV_F2_LEN (2 * CAAM_CMD_SZ + \ + sizeof(struct rsa_priv_f2_pdb)) +#define DESC_RSA_PRIV_F3_LEN (2 * CAAM_CMD_SZ + \ + sizeof(struct rsa_priv_f3_pdb)) static void rsa_io_unmap(struct device *dev, struct rsa_edesc *edesc, struct akcipher_request *req) @@ -54,6 +58,42 @@ static void rsa_priv_f1_unmap(struct device *dev, struct rsa_edesc *edesc, dma_unmap_single(dev, pdb->d_dma, key->d_sz, DMA_TO_DEVICE); } +static void rsa_priv_f2_unmap(struct device *dev, struct rsa_edesc *edesc, + struct akcipher_request *req) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct rsa_priv_f2_pdb *pdb = &edesc->pdb.priv_f2; + size_t p_sz = key->p_sz; + size_t q_sz = key->p_sz; + + dma_unmap_single(dev, pdb->d_dma, key->d_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->p_dma, p_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->q_dma, q_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->tmp2_dma, q_sz, DMA_TO_DEVICE); +} + +static void rsa_priv_f3_unmap(struct device *dev, struct rsa_edesc *edesc, + struct akcipher_request *req) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct rsa_priv_f3_pdb *pdb = &edesc->pdb.priv_f3; + size_t p_sz = key->p_sz; + size_t q_sz = key->p_sz; + + dma_unmap_single(dev, pdb->p_dma, p_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->q_dma, q_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->dp_dma, p_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->dq_dma, q_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->c_dma, p_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE); + dma_unmap_single(dev, pdb->tmp2_dma, q_sz, DMA_TO_DEVICE); +} + /* RSA Job Completion handler */ static void rsa_pub_done(struct device *dev, u32 *desc, u32 err, void *context) { @@ -90,6 +130,42 @@ static void rsa_priv_f1_done(struct device *dev, u32 *desc, u32 err, akcipher_request_complete(req, err); } +static void rsa_priv_f2_done(struct device *dev, u32 *desc, u32 err, + void *context) +{ + struct akcipher_request *req = context; + struct rsa_edesc *edesc; + + if (err) + caam_jr_strstatus(dev, err); + + edesc = container_of(desc, struct rsa_edesc, hw_desc[0]); + + rsa_priv_f2_unmap(dev, edesc, req); + rsa_io_unmap(dev, edesc, req); + kfree(edesc); + + akcipher_request_complete(req, err); +} + +static void rsa_priv_f3_done(struct device *dev, u32 *desc, u32 err, + void *context) +{ + struct akcipher_request *req = context; + struct rsa_edesc *edesc; + + if (err) + caam_jr_strstatus(dev, err); + + edesc = container_of(desc, struct rsa_edesc, hw_desc[0]); + + rsa_priv_f3_unmap(dev, edesc, req); + rsa_io_unmap(dev, edesc, req); + kfree(edesc); + + akcipher_request_complete(req, err); +} + static struct rsa_edesc *rsa_edesc_alloc(struct akcipher_request *req, size_t desclen) { @@ -258,6 +334,172 @@ static int set_rsa_priv_f1_pdb(struct akcipher_request *req, return 0; } +static int set_rsa_priv_f2_pdb(struct akcipher_request *req, + struct rsa_edesc *edesc) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct device *dev = ctx->dev; + struct rsa_priv_f2_pdb *pdb = &edesc->pdb.priv_f2; + int sec4_sg_index = 0; + size_t p_sz = key->p_sz; + size_t q_sz = key->p_sz; + + pdb->d_dma = dma_map_single(dev, key->d, key->d_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->d_dma)) { + dev_err(dev, "Unable to map RSA private exponent memory\n"); + return -ENOMEM; + } + + pdb->p_dma = dma_map_single(dev, key->p, p_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->p_dma)) { + dev_err(dev, "Unable to map RSA prime factor p memory\n"); + goto unmap_d; + } + + pdb->q_dma = dma_map_single(dev, key->q, q_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->q_dma)) { + dev_err(dev, "Unable to map RSA prime factor q memory\n"); + goto unmap_p; + } + + pdb->tmp1_dma = dma_map_single(dev, key->tmp1, p_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->tmp1_dma)) { + dev_err(dev, "Unable to map RSA tmp1 memory\n"); + goto unmap_q; + } + + pdb->tmp2_dma = dma_map_single(dev, key->tmp2, q_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->tmp2_dma)) { + dev_err(dev, "Unable to map RSA tmp2 memory\n"); + goto unmap_tmp1; + } + + if (edesc->src_nents > 1) { + pdb->sgf |= RSA_PRIV_PDB_SGF_G; + pdb->g_dma = edesc->sec4_sg_dma; + sec4_sg_index += edesc->src_nents; + } else { + pdb->g_dma = sg_dma_address(req->src); + } + + if (edesc->dst_nents > 1) { + pdb->sgf |= RSA_PRIV_PDB_SGF_F; + pdb->f_dma = edesc->sec4_sg_dma + + sec4_sg_index * sizeof(struct sec4_sg_entry); + } else { + pdb->f_dma = sg_dma_address(req->dst); + } + + pdb->sgf |= (key->d_sz << RSA_PDB_D_SHIFT) | key->n_sz; + pdb->p_q_len = (q_sz << RSA_PDB_Q_SHIFT) | p_sz; + + return 0; + +unmap_tmp1: + dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE); +unmap_q: + dma_unmap_single(dev, pdb->q_dma, q_sz, DMA_TO_DEVICE); +unmap_p: + dma_unmap_single(dev, pdb->p_dma, p_sz, DMA_TO_DEVICE); +unmap_d: + dma_unmap_single(dev, pdb->d_dma, key->d_sz, DMA_TO_DEVICE); + + return -ENOMEM; +} + +static int set_rsa_priv_f3_pdb(struct akcipher_request *req, + struct rsa_edesc *edesc) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + struct device *dev = ctx->dev; + struct rsa_priv_f3_pdb *pdb = &edesc->pdb.priv_f3; + int sec4_sg_index = 0; + size_t p_sz = key->p_sz; + size_t q_sz = key->p_sz; + + pdb->p_dma = dma_map_single(dev, key->p, p_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->p_dma)) { + dev_err(dev, "Unable to map RSA prime factor p memory\n"); + return -ENOMEM; + } + + pdb->q_dma = dma_map_single(dev, key->q, q_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->q_dma)) { + dev_err(dev, "Unable to map RSA prime factor q memory\n"); + goto unmap_p; + } + + pdb->dp_dma = dma_map_single(dev, key->dp, p_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->dp_dma)) { + dev_err(dev, "Unable to map RSA exponent dp memory\n"); + goto unmap_q; + } + + pdb->dq_dma = dma_map_single(dev, key->dq, q_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->dq_dma)) { + dev_err(dev, "Unable to map RSA exponent dq memory\n"); + goto unmap_dp; + } + + pdb->c_dma = dma_map_single(dev, key->qinv, p_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->c_dma)) { + dev_err(dev, "Unable to map RSA CRT coefficient qinv memory\n"); + goto unmap_dq; + } + + pdb->tmp1_dma = dma_map_single(dev, key->tmp1, p_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->tmp1_dma)) { + dev_err(dev, "Unable to map RSA tmp1 memory\n"); + goto unmap_qinv; + } + + pdb->tmp2_dma = dma_map_single(dev, key->tmp2, q_sz, DMA_TO_DEVICE); + if (dma_mapping_error(dev, pdb->tmp2_dma)) { + dev_err(dev, "Unable to map RSA tmp2 memory\n"); + goto unmap_tmp1; + } + + if (edesc->src_nents > 1) { + pdb->sgf |= RSA_PRIV_PDB_SGF_G; + pdb->g_dma = edesc->sec4_sg_dma; + sec4_sg_index += edesc->src_nents; + } else { + pdb->g_dma = sg_dma_address(req->src); + } + + if (edesc->dst_nents > 1) { + pdb->sgf |= RSA_PRIV_PDB_SGF_F; + pdb->f_dma = edesc->sec4_sg_dma + + sec4_sg_index * sizeof(struct sec4_sg_entry); + } else { + pdb->f_dma = sg_dma_address(req->dst); + } + + pdb->sgf |= key->n_sz; + pdb->p_q_len = (q_sz << RSA_PDB_Q_SHIFT) | p_sz; + + return 0; + +unmap_tmp1: + dma_unmap_single(dev, pdb->tmp1_dma, p_sz, DMA_TO_DEVICE); +unmap_qinv: + dma_unmap_single(dev, pdb->c_dma, p_sz, DMA_TO_DEVICE); +unmap_dq: + dma_unmap_single(dev, pdb->dq_dma, q_sz, DMA_TO_DEVICE); +unmap_dp: + dma_unmap_single(dev, pdb->dp_dma, p_sz, DMA_TO_DEVICE); +unmap_q: + dma_unmap_single(dev, pdb->q_dma, q_sz, DMA_TO_DEVICE); +unmap_p: + dma_unmap_single(dev, pdb->p_dma, p_sz, DMA_TO_DEVICE); + + return -ENOMEM; +} + static int caam_rsa_enc(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); @@ -301,24 +543,14 @@ init_fail: return ret; } -static int caam_rsa_dec(struct akcipher_request *req) +static int caam_rsa_dec_priv_f1(struct akcipher_request *req) { struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); - struct caam_rsa_key *key = &ctx->key; struct device *jrdev = ctx->dev; struct rsa_edesc *edesc; int ret; - if (unlikely(!key->n || !key->d)) - return -EINVAL; - - if (req->dst_len < key->n_sz) { - req->dst_len = key->n_sz; - dev_err(jrdev, "Output buffer length less than parameter n\n"); - return -EOVERFLOW; - } - /* Allocate extended descriptor */ edesc = rsa_edesc_alloc(req, DESC_RSA_PRIV_F1_LEN); if (IS_ERR(edesc)) @@ -344,17 +576,147 @@ init_fail: return ret; } +static int caam_rsa_dec_priv_f2(struct akcipher_request *req) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct device *jrdev = ctx->dev; + struct rsa_edesc *edesc; + int ret; + + /* Allocate extended descriptor */ + edesc = rsa_edesc_alloc(req, DESC_RSA_PRIV_F2_LEN); + if (IS_ERR(edesc)) + return PTR_ERR(edesc); + + /* Set RSA Decrypt Protocol Data Block - Private Key Form #2 */ + ret = set_rsa_priv_f2_pdb(req, edesc); + if (ret) + goto init_fail; + + /* Initialize Job Descriptor */ + init_rsa_priv_f2_desc(edesc->hw_desc, &edesc->pdb.priv_f2); + + ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f2_done, req); + if (!ret) + return -EINPROGRESS; + + rsa_priv_f2_unmap(jrdev, edesc, req); + +init_fail: + rsa_io_unmap(jrdev, edesc, req); + kfree(edesc); + return ret; +} + +static int caam_rsa_dec_priv_f3(struct akcipher_request *req) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct device *jrdev = ctx->dev; + struct rsa_edesc *edesc; + int ret; + + /* Allocate extended descriptor */ + edesc = rsa_edesc_alloc(req, DESC_RSA_PRIV_F3_LEN); + if (IS_ERR(edesc)) + return PTR_ERR(edesc); + + /* Set RSA Decrypt Protocol Data Block - Private Key Form #3 */ + ret = set_rsa_priv_f3_pdb(req, edesc); + if (ret) + goto init_fail; + + /* Initialize Job Descriptor */ + init_rsa_priv_f3_desc(edesc->hw_desc, &edesc->pdb.priv_f3); + + ret = caam_jr_enqueue(jrdev, edesc->hw_desc, rsa_priv_f3_done, req); + if (!ret) + return -EINPROGRESS; + + rsa_priv_f3_unmap(jrdev, edesc, req); + +init_fail: + rsa_io_unmap(jrdev, edesc, req); + kfree(edesc); + return ret; +} + +static int caam_rsa_dec(struct akcipher_request *req) +{ + struct crypto_akcipher *tfm = crypto_akcipher_reqtfm(req); + struct caam_rsa_ctx *ctx = akcipher_tfm_ctx(tfm); + struct caam_rsa_key *key = &ctx->key; + int ret; + + if (unlikely(!key->n || !key->d)) + return -EINVAL; + + if (req->dst_len < key->n_sz) { + req->dst_len = key->n_sz; + dev_err(ctx->dev, "Output buffer length less than parameter n\n"); + return -EOVERFLOW; + } + + if (key->priv_form == FORM3) + ret = caam_rsa_dec_priv_f3(req); + else if (key->priv_form == FORM2) + ret = caam_rsa_dec_priv_f2(req); + else + ret = caam_rsa_dec_priv_f1(req); + + return ret; +} + static void caam_rsa_free_key(struct caam_rsa_key *key) { kzfree(key->d); + kzfree(key->p); + kzfree(key->q); + kzfree(key->dp); + kzfree(key->dq); + kzfree(key->qinv); + kzfree(key->tmp1); + kzfree(key->tmp2); kfree(key->e); kfree(key->n); - key->d = NULL; - key->e = NULL; - key->n = NULL; - key->d_sz = 0; - key->e_sz = 0; - key->n_sz = 0; + memset(key, 0, sizeof(*key)); +} + +static void caam_rsa_drop_leading_zeros(const u8 **ptr, size_t *nbytes) +{ + while (!**ptr && *nbytes) { + (*ptr)++; + (*nbytes)--; + } +} + +/** + * caam_read_rsa_crt - Used for reading dP, dQ, qInv CRT members. + * dP, dQ and qInv could decode to less than corresponding p, q length, as the + * BER-encoding requires that the minimum number of bytes be used to encode the + * integer. dP, dQ, qInv decoded values have to be zero-padded to appropriate + * length. + * + * @ptr : pointer to {dP, dQ, qInv} CRT member + * @nbytes: length in bytes of {dP, dQ, qInv} CRT member + * @dstlen: length in bytes of corresponding p or q prime factor + */ +static u8 *caam_read_rsa_crt(const u8 *ptr, size_t nbytes, size_t dstlen) +{ + u8 *dst; + + caam_rsa_drop_leading_zeros(&ptr, &nbytes); + if (!nbytes) + return NULL; + + dst = kzalloc(dstlen, GFP_DMA | GFP_KERNEL); + if (!dst) + return NULL; + + memcpy(dst + (dstlen - nbytes), ptr, nbytes); + + return dst; } /** @@ -370,10 +732,9 @@ static inline u8 *caam_read_raw_data(const u8 *buf, size_t *nbytes) { u8 *val; - while (!*buf && *nbytes) { - buf++; - (*nbytes)--; - } + caam_rsa_drop_leading_zeros(&buf, nbytes); + if (!*nbytes) + return NULL; val = kzalloc(*nbytes, GFP_DMA | GFP_KERNEL); if (!val) @@ -437,6 +798,64 @@ err: return -ENOMEM; } +static void caam_rsa_set_priv_key_form(struct caam_rsa_ctx *ctx, + struct rsa_key *raw_key) +{ + struct caam_rsa_key *rsa_key = &ctx->key; + size_t p_sz = raw_key->p_sz; + size_t q_sz = raw_key->q_sz; + + rsa_key->p = caam_read_raw_data(raw_key->p, &p_sz); + if (!rsa_key->p) + return; + rsa_key->p_sz = p_sz; + + rsa_key->q = caam_read_raw_data(raw_key->q, &q_sz); + if (!rsa_key->q) + goto free_p; + rsa_key->q_sz = q_sz; + + rsa_key->tmp1 = kzalloc(raw_key->p_sz, GFP_DMA | GFP_KERNEL); + if (!rsa_key->tmp1) + goto free_q; + + rsa_key->tmp2 = kzalloc(raw_key->q_sz, GFP_DMA | GFP_KERNEL); + if (!rsa_key->tmp2) + goto free_tmp1; + + rsa_key->priv_form = FORM2; + + rsa_key->dp = caam_read_rsa_crt(raw_key->dp, raw_key->dp_sz, p_sz); + if (!rsa_key->dp) + goto free_tmp2; + + rsa_key->dq = caam_read_rsa_crt(raw_key->dq, raw_key->dq_sz, q_sz); + if (!rsa_key->dq) + goto free_dp; + + rsa_key->qinv = caam_read_rsa_crt(raw_key->qinv, raw_key->qinv_sz, + q_sz); + if (!rsa_key->qinv) + goto free_dq; + + rsa_key->priv_form = FORM3; + + return; + +free_dq: + kzfree(rsa_key->dq); +free_dp: + kzfree(rsa_key->dp); +free_tmp2: + kzfree(rsa_key->tmp2); +free_tmp1: + kzfree(rsa_key->tmp1); +free_q: + kzfree(rsa_key->q); +free_p: + kzfree(rsa_key->p); +} + static int caam_rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key, unsigned int keylen) { @@ -483,6 +902,8 @@ static int caam_rsa_set_priv_key(struct crypto_akcipher *tfm, const void *key, memcpy(rsa_key->d, raw_key.d, raw_key.d_sz); memcpy(rsa_key->e, raw_key.e, raw_key.e_sz); + caam_rsa_set_priv_key_form(ctx, &raw_key); + return 0; err: diff --git a/drivers/crypto/caam/caampkc.h b/drivers/crypto/caam/caampkc.h index f595d159b112..87ab75e9df43 100644 --- a/drivers/crypto/caam/caampkc.h +++ b/drivers/crypto/caam/caampkc.h @@ -13,21 +13,75 @@ #include "pdb.h" /** + * caam_priv_key_form - CAAM RSA private key representation + * CAAM RSA private key may have either of three forms. + * + * 1. The first representation consists of the pair (n, d), where the + * components have the following meanings: + * n the RSA modulus + * d the RSA private exponent + * + * 2. The second representation consists of the triplet (p, q, d), where the + * components have the following meanings: + * p the first prime factor of the RSA modulus n + * q the second prime factor of the RSA modulus n + * d the RSA private exponent + * + * 3. The third representation consists of the quintuple (p, q, dP, dQ, qInv), + * where the components have the following meanings: + * p the first prime factor of the RSA modulus n + * q the second prime factor of the RSA modulus n + * dP the first factors's CRT exponent + * dQ the second factors's CRT exponent + * qInv the (first) CRT coefficient + * + * The benefit of using the third or the second key form is lower computational + * cost for the decryption and signature operations. + */ +enum caam_priv_key_form { + FORM1, + FORM2, + FORM3 +}; + +/** * caam_rsa_key - CAAM RSA key structure. Keys are allocated in DMA zone. * @n : RSA modulus raw byte stream * @e : RSA public exponent raw byte stream * @d : RSA private exponent raw byte stream + * @p : RSA prime factor p of RSA modulus n + * @q : RSA prime factor q of RSA modulus n + * @dp : RSA CRT exponent of p + * @dp : RSA CRT exponent of q + * @qinv : RSA CRT coefficient + * @tmp1 : CAAM uses this temporary buffer as internal state buffer. + * It is assumed to be as long as p. + * @tmp2 : CAAM uses this temporary buffer as internal state buffer. + * It is assumed to be as long as q. * @n_sz : length in bytes of RSA modulus n * @e_sz : length in bytes of RSA public exponent * @d_sz : length in bytes of RSA private exponent + * @p_sz : length in bytes of RSA prime factor p of RSA modulus n + * @q_sz : length in bytes of RSA prime factor q of RSA modulus n + * @priv_form : CAAM RSA private key representation */ struct caam_rsa_key { u8 *n; u8 *e; u8 *d; + u8 *p; + u8 *q; + u8 *dp; + u8 *dq; + u8 *qinv; + u8 *tmp1; + u8 *tmp2; size_t n_sz; size_t e_sz; size_t d_sz; + size_t p_sz; + size_t q_sz; + enum caam_priv_key_form priv_form; }; /** @@ -59,6 +113,8 @@ struct rsa_edesc { union { struct rsa_pub_pdb pub; struct rsa_priv_f1_pdb priv_f1; + struct rsa_priv_f2_pdb priv_f2; + struct rsa_priv_f3_pdb priv_f3; } pdb; u32 hw_desc[]; }; @@ -66,5 +122,7 @@ struct rsa_edesc { /* Descriptor construction primitives. */ void init_rsa_pub_desc(u32 *desc, struct rsa_pub_pdb *pdb); void init_rsa_priv_f1_desc(u32 *desc, struct rsa_priv_f1_pdb *pdb); +void init_rsa_priv_f2_desc(u32 *desc, struct rsa_priv_f2_pdb *pdb); +void init_rsa_priv_f3_desc(u32 *desc, struct rsa_priv_f3_pdb *pdb); #endif diff --git a/drivers/crypto/caam/pdb.h b/drivers/crypto/caam/pdb.h index aaa00dd1c601..31e59963f4d2 100644 --- a/drivers/crypto/caam/pdb.h +++ b/drivers/crypto/caam/pdb.h @@ -483,6 +483,8 @@ struct dsa_verify_pdb { #define RSA_PDB_E_MASK (0xFFF << RSA_PDB_E_SHIFT) #define RSA_PDB_D_SHIFT 12 #define RSA_PDB_D_MASK (0xFFF << RSA_PDB_D_SHIFT) +#define RSA_PDB_Q_SHIFT 12 +#define RSA_PDB_Q_MASK (0xFFF << RSA_PDB_Q_SHIFT) #define RSA_PDB_SGF_F (0x8 << RSA_PDB_SGF_SHIFT) #define RSA_PDB_SGF_G (0x4 << RSA_PDB_SGF_SHIFT) @@ -490,6 +492,8 @@ struct dsa_verify_pdb { #define RSA_PRIV_PDB_SGF_G (0x8 << RSA_PDB_SGF_SHIFT) #define RSA_PRIV_KEY_FRM_1 0 +#define RSA_PRIV_KEY_FRM_2 1 +#define RSA_PRIV_KEY_FRM_3 2 /** * RSA Encrypt Protocol Data Block @@ -525,4 +529,62 @@ struct rsa_priv_f1_pdb { dma_addr_t d_dma; } __packed; +/** + * RSA Decrypt PDB - Private Key Form #2 + * @sgf : scatter-gather field + * @g_dma : dma address of encrypted input data + * @f_dma : dma address of output data + * @d_dma : dma address of RSA private exponent + * @p_dma : dma address of RSA prime factor p of RSA modulus n + * @q_dma : dma address of RSA prime factor q of RSA modulus n + * @tmp1_dma: dma address of temporary buffer. CAAM uses this temporary buffer + * as internal state buffer. It is assumed to be as long as p. + * @tmp2_dma: dma address of temporary buffer. CAAM uses this temporary buffer + * as internal state buffer. It is assumed to be as long as q. + * @p_q_len : length in bytes of first two prime factors of the RSA modulus n + */ +struct rsa_priv_f2_pdb { + u32 sgf; + dma_addr_t g_dma; + dma_addr_t f_dma; + dma_addr_t d_dma; + dma_addr_t p_dma; + dma_addr_t q_dma; + dma_addr_t tmp1_dma; + dma_addr_t tmp2_dma; + u32 p_q_len; +} __packed; + +/** + * RSA Decrypt PDB - Private Key Form #3 + * This is the RSA Chinese Reminder Theorem (CRT) form for two prime factors of + * the RSA modulus. + * @sgf : scatter-gather field + * @g_dma : dma address of encrypted input data + * @f_dma : dma address of output data + * @c_dma : dma address of RSA CRT coefficient + * @p_dma : dma address of RSA prime factor p of RSA modulus n + * @q_dma : dma address of RSA prime factor q of RSA modulus n + * @dp_dma : dma address of RSA CRT exponent of RSA prime factor p + * @dp_dma : dma address of RSA CRT exponent of RSA prime factor q + * @tmp1_dma: dma address of temporary buffer. CAAM uses this temporary buffer + * as internal state buffer. It is assumed to be as long as p. + * @tmp2_dma: dma address of temporary buffer. CAAM uses this temporary buffer + * as internal state buffer. It is assumed to be as long as q. + * @p_q_len : length in bytes of first two prime factors of the RSA modulus n + */ +struct rsa_priv_f3_pdb { + u32 sgf; + dma_addr_t g_dma; + dma_addr_t f_dma; + dma_addr_t c_dma; + dma_addr_t p_dma; + dma_addr_t q_dma; + dma_addr_t dp_dma; + dma_addr_t dq_dma; + dma_addr_t tmp1_dma; + dma_addr_t tmp2_dma; + u32 p_q_len; +} __packed; + #endif diff --git a/drivers/crypto/caam/pkc_desc.c b/drivers/crypto/caam/pkc_desc.c index 4e4183e615ea..9e2ce6fe2e43 100644 --- a/drivers/crypto/caam/pkc_desc.c +++ b/drivers/crypto/caam/pkc_desc.c @@ -34,3 +34,39 @@ void init_rsa_priv_f1_desc(u32 *desc, struct rsa_priv_f1_pdb *pdb) append_operation(desc, OP_TYPE_UNI_PROTOCOL | OP_PCLID_RSADEC_PRVKEY | RSA_PRIV_KEY_FRM_1); } + +/* Descriptor for RSA Private operation - Private Key Form #2 */ +void init_rsa_priv_f2_desc(u32 *desc, struct rsa_priv_f2_pdb *pdb) +{ + init_job_desc_pdb(desc, 0, sizeof(*pdb)); + append_cmd(desc, pdb->sgf); + append_ptr(desc, pdb->g_dma); + append_ptr(desc, pdb->f_dma); + append_ptr(desc, pdb->d_dma); + append_ptr(desc, pdb->p_dma); + append_ptr(desc, pdb->q_dma); + append_ptr(desc, pdb->tmp1_dma); + append_ptr(desc, pdb->tmp2_dma); + append_cmd(desc, pdb->p_q_len); + append_operation(desc, OP_TYPE_UNI_PROTOCOL | OP_PCLID_RSADEC_PRVKEY | + RSA_PRIV_KEY_FRM_2); +} + +/* Descriptor for RSA Private operation - Private Key Form #3 */ +void init_rsa_priv_f3_desc(u32 *desc, struct rsa_priv_f3_pdb *pdb) +{ + init_job_desc_pdb(desc, 0, sizeof(*pdb)); + append_cmd(desc, pdb->sgf); + append_ptr(desc, pdb->g_dma); + append_ptr(desc, pdb->f_dma); + append_ptr(desc, pdb->c_dma); + append_ptr(desc, pdb->p_dma); + append_ptr(desc, pdb->q_dma); + append_ptr(desc, pdb->dp_dma); + append_ptr(desc, pdb->dq_dma); + append_ptr(desc, pdb->tmp1_dma); + append_ptr(desc, pdb->tmp2_dma); + append_cmd(desc, pdb->p_q_len); + append_operation(desc, OP_TYPE_UNI_PROTOCOL | OP_PCLID_RSADEC_PRVKEY | + RSA_PRIV_KEY_FRM_3); +} diff --git a/drivers/crypto/ccp/ccp-crypto-sha.c b/drivers/crypto/ccp/ccp-crypto-sha.c index 6b46eea94932..ce97b3868f4a 100644 --- a/drivers/crypto/ccp/ccp-crypto-sha.c +++ b/drivers/crypto/ccp/ccp-crypto-sha.c @@ -18,6 +18,7 @@ #include <linux/crypto.h> #include <crypto/algapi.h> #include <crypto/hash.h> +#include <crypto/hmac.h> #include <crypto/internal/hash.h> #include <crypto/sha.h> #include <crypto/scatterwalk.h> @@ -308,8 +309,8 @@ static int ccp_sha_setkey(struct crypto_ahash *tfm, const u8 *key, } for (i = 0; i < block_size; i++) { - ctx->u.sha.ipad[i] = ctx->u.sha.key[i] ^ 0x36; - ctx->u.sha.opad[i] = ctx->u.sha.key[i] ^ 0x5c; + ctx->u.sha.ipad[i] = ctx->u.sha.key[i] ^ HMAC_IPAD_VALUE; + ctx->u.sha.opad[i] = ctx->u.sha.key[i] ^ HMAC_OPAD_VALUE; } sg_init_one(&ctx->u.sha.opad_sg, ctx->u.sha.opad, block_size); diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c index 92d1c6959f08..b7504562715c 100644 --- a/drivers/crypto/ccp/ccp-dev.c +++ b/drivers/crypto/ccp/ccp-dev.c @@ -31,6 +31,7 @@ #include "ccp-dev.h" MODULE_AUTHOR("Tom Lendacky <thomas.lendacky@amd.com>"); +MODULE_AUTHOR("Gary R Hook <gary.hook@amd.com>"); MODULE_LICENSE("GPL"); MODULE_VERSION("1.0.0"); MODULE_DESCRIPTION("AMD Cryptographic Coprocessor driver"); diff --git a/drivers/crypto/img-hash.c b/drivers/crypto/img-hash.c index 9b07f3d88feb..0c6a917a9ab8 100644 --- a/drivers/crypto/img-hash.c +++ b/drivers/crypto/img-hash.c @@ -1088,9 +1088,17 @@ static int img_hash_suspend(struct device *dev) static int img_hash_resume(struct device *dev) { struct img_hash_dev *hdev = dev_get_drvdata(dev); + int ret; - clk_prepare_enable(hdev->hash_clk); - clk_prepare_enable(hdev->sys_clk); + ret = clk_prepare_enable(hdev->hash_clk); + if (ret) + return ret; + + ret = clk_prepare_enable(hdev->sys_clk); + if (ret) { + clk_disable_unprepare(hdev->hash_clk); + return ret; + } return 0; } diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c index 771dd26c7076..427cbe012729 100644 --- a/drivers/crypto/ixp4xx_crypto.c +++ b/drivers/crypto/ixp4xx_crypto.c @@ -23,6 +23,7 @@ #include <crypto/ctr.h> #include <crypto/des.h> #include <crypto/aes.h> +#include <crypto/hmac.h> #include <crypto/sha.h> #include <crypto/algapi.h> #include <crypto/internal/aead.h> @@ -90,8 +91,6 @@ #define CTL_FLAG_PERFORM_AEAD 0x0008 #define CTL_FLAG_MASK 0x000f -#define HMAC_IPAD_VALUE 0x36 -#define HMAC_OPAD_VALUE 0x5C #define HMAC_PAD_BLOCKLEN SHA1_BLOCK_SIZE #define MD5_DIGEST_SIZE 16 diff --git a/drivers/crypto/marvell/hash.c b/drivers/crypto/marvell/hash.c index 77c0fb936f47..e61b08566093 100644 --- a/drivers/crypto/marvell/hash.c +++ b/drivers/crypto/marvell/hash.c @@ -12,6 +12,7 @@ * by the Free Software Foundation. */ +#include <crypto/hmac.h> #include <crypto/md5.h> #include <crypto/sha.h> @@ -1164,8 +1165,8 @@ static int mv_cesa_ahmac_pad_init(struct ahash_request *req, memcpy(opad, ipad, blocksize); for (i = 0; i < blocksize; i++) { - ipad[i] ^= 0x36; - opad[i] ^= 0x5c; + ipad[i] ^= HMAC_IPAD_VALUE; + opad[i] ^= HMAC_OPAD_VALUE; } return 0; diff --git a/drivers/crypto/mediatek/mtk-sha.c b/drivers/crypto/mediatek/mtk-sha.c index 2226f12d1c7a..5f4f845adbb8 100644 --- a/drivers/crypto/mediatek/mtk-sha.c +++ b/drivers/crypto/mediatek/mtk-sha.c @@ -12,6 +12,7 @@ * Some ideas are from atmel-sha.c and omap-sham.c drivers. */ +#include <crypto/hmac.h> #include <crypto/sha.h> #include "mtk-platform.h" @@ -825,8 +826,8 @@ static int mtk_sha_setkey(struct crypto_ahash *tfm, const u8 *key, memcpy(bctx->opad, bctx->ipad, bs); for (i = 0; i < bs; i++) { - bctx->ipad[i] ^= 0x36; - bctx->opad[i] ^= 0x5c; + bctx->ipad[i] ^= HMAC_IPAD_VALUE; + bctx->opad[i] ^= HMAC_OPAD_VALUE; } return 0; diff --git a/drivers/crypto/mv_cesa.c b/drivers/crypto/mv_cesa.c index 451fa18c1c7b..bf25f415eea6 100644 --- a/drivers/crypto/mv_cesa.c +++ b/drivers/crypto/mv_cesa.c @@ -18,6 +18,7 @@ #include <linux/slab.h> #include <linux/module.h> #include <linux/clk.h> +#include <crypto/hmac.h> #include <crypto/internal/hash.h> #include <crypto/sha.h> #include <linux/of.h> @@ -822,8 +823,8 @@ static int mv_hash_setkey(struct crypto_ahash *tfm, const u8 * key, memcpy(opad, ipad, bs); for (i = 0; i < bs; i++) { - ipad[i] ^= 0x36; - opad[i] ^= 0x5c; + ipad[i] ^= HMAC_IPAD_VALUE; + opad[i] ^= HMAC_OPAD_VALUE; } rc = crypto_shash_init(shash) ? : diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c index d0b16e5e4ee5..1864a57caaa4 100644 --- a/drivers/crypto/omap-sham.c +++ b/drivers/crypto/omap-sham.c @@ -41,6 +41,7 @@ #include <crypto/algapi.h> #include <crypto/sha.h> #include <crypto/hash.h> +#include <crypto/hmac.h> #include <crypto/internal/hash.h> #define MD5_DIGEST_SIZE 16 @@ -1326,8 +1327,8 @@ static int omap_sham_setkey(struct crypto_ahash *tfm, const u8 *key, memcpy(bctx->opad, bctx->ipad, bs); for (i = 0; i < bs; i++) { - bctx->ipad[i] ^= 0x36; - bctx->opad[i] ^= 0x5c; + bctx->ipad[i] ^= HMAC_IPAD_VALUE; + bctx->opad[i] ^= HMAC_OPAD_VALUE; } } diff --git a/drivers/crypto/qat/qat_common/adf_aer.c b/drivers/crypto/qat/qat_common/adf_aer.c index 2839fccdd84b..d3e25c37dc33 100644 --- a/drivers/crypto/qat/qat_common/adf_aer.c +++ b/drivers/crypto/qat/qat_common/adf_aer.c @@ -109,20 +109,7 @@ EXPORT_SYMBOL_GPL(adf_reset_sbr); void adf_reset_flr(struct adf_accel_dev *accel_dev) { - struct pci_dev *pdev = accel_to_pci_dev(accel_dev); - u16 control = 0; - int pos = 0; - - dev_info(&GET_DEV(accel_dev), "Function level reset\n"); - pos = pci_pcie_cap(pdev); - if (!pos) { - dev_err(&GET_DEV(accel_dev), "Restart device failed\n"); - return; - } - pci_read_config_word(pdev, pos + PCI_EXP_DEVCTL, &control); - control |= PCI_EXP_DEVCTL_BCR_FLR; - pci_write_config_word(pdev, pos + PCI_EXP_DEVCTL, control); - msleep(100); + pcie_flr(accel_to_pci_dev(accel_dev)); } EXPORT_SYMBOL_GPL(adf_reset_flr); diff --git a/drivers/crypto/qat/qat_common/qat_algs.c b/drivers/crypto/qat/qat_common/qat_algs.c index 20f35df8a01f..5b5efcc52cb5 100644 --- a/drivers/crypto/qat/qat_common/qat_algs.c +++ b/drivers/crypto/qat/qat_common/qat_algs.c @@ -51,6 +51,7 @@ #include <crypto/aes.h> #include <crypto/sha.h> #include <crypto/hash.h> +#include <crypto/hmac.h> #include <crypto/algapi.h> #include <crypto/authenc.h> #include <linux/dma-mapping.h> @@ -178,8 +179,8 @@ static int qat_alg_do_precomputes(struct icp_qat_hw_auth_algo_blk *hash, for (i = 0; i < block_size; i++) { char *ipad_ptr = ipad + i; char *opad_ptr = opad + i; - *ipad_ptr ^= 0x36; - *opad_ptr ^= 0x5C; + *ipad_ptr ^= HMAC_IPAD_VALUE; + *opad_ptr ^= HMAC_OPAD_VALUE; } if (crypto_shash_init(shash)) diff --git a/include/crypto/hmac.h b/include/crypto/hmac.h new file mode 100644 index 000000000000..ef09f7938204 --- /dev/null +++ b/include/crypto/hmac.h @@ -0,0 +1,7 @@ +#ifndef _CRYPTO_HMAC_H +#define _CRYPTO_HMAC_H + +#define HMAC_IPAD_VALUE 0x36 +#define HMAC_OPAD_VALUE 0x5c + +#endif /* _CRYPTO_HMAC_H */ |