diff options
Diffstat (limited to 'debian/patches/old/0002-bsaes-armv7.pl-Big-endian-fixes.patch')
-rw-r--r-- | debian/patches/old/0002-bsaes-armv7.pl-Big-endian-fixes.patch | 216 |
1 files changed, 216 insertions, 0 deletions
diff --git a/debian/patches/old/0002-bsaes-armv7.pl-Big-endian-fixes.patch b/debian/patches/old/0002-bsaes-armv7.pl-Big-endian-fixes.patch new file mode 100644 index 0000000..23fb94a --- /dev/null +++ b/debian/patches/old/0002-bsaes-armv7.pl-Big-endian-fixes.patch @@ -0,0 +1,216 @@ +From 719e0b800e3737f3a19251a097ff911744ed7a9e Mon Sep 17 00:00:00 2001 +From: Ard Biesheuvel <ard.biesheuvel@linaro.org> +Date: Mon, 15 Apr 2013 13:54:13 +0200 +Subject: [PATCH 2/3] bsaes-armv7.pl: Big endian fixes + +Updated the code to be (more) endian neutral, however, as it is +still untested on big endian, it is only enabled for little endian +at the moment. +--- + crypto/aes/asm/bsaes-armv7.pl | 121 ++++++++++++++---------------------------- + crypto/evp/e_aes.c | 3 - + 2 files changed, 45 insertions(+), 79 deletions(-) + +--- a/crypto/aes/asm/bsaes-armv7.pl ++++ b/crypto/aes/asm/bsaes-armv7.pl +@@ -1196,8 +1196,9 @@ bsaes_cbc_encrypt: + + .Lcbc_dec_done: + vmov.i32 q0, #0 ++ vmov.i32 q1, #0 + .Lcbc_dec_bzero: @ wipe key schedule [if any] +- vst1.8 {q0}, [$keysched]! ++ vstm $keysched!, {q0-q1} + teq $keysched, r10 + bne .Lcbc_dec_bzero + +@@ -1215,13 +1216,9 @@ my ($inp,$out,$len,$key,$keysched,$const + + $code.=<<___; + .align 5 +- @ byte-swap constants +-.LSWP: +- .quad 0x0405060703020100 +-.LSWPUPM0SR: +- .quad 0x0a0d02060c03070b, 0x0004080f05090e01 ++ + .LADD: +- .quad 0x0807060504030201 ++ .long 1,2,3,4,5,6,7,0 + + .extern AES_encrypt + .global bsaes_ctr32_encrypt_blocks +@@ -1233,7 +1230,7 @@ bsaes_ctr32_encrypt_blocks: + stmdb sp!, {r4-r8, r10-r11, lr} + vstmdb sp!, {d8-d15} @ ABI specification says so + ldr $ctr, [sp, #0x60] @ ctr is 1st arg on the stack +- sub sp, #0x10 @ scratch space to carry over the ctr ++ sub sp, #0x20 @ scratch space to carry over the ctr + mov r10, sp @ save sp + + @ allocate the key schedule on the stack +@@ -1249,92 +1246,61 @@ bsaes_ctr32_encrypt_blocks: + veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key + vstmia r12, {@XMM[7]} @ save last round key + +- vldm $ctr, {@XMM[0]} @ load counter +- mov $ctr, r10 +- +- vldm $keysched, {@XMM[4]} @ load round0 key +- +- vldr `&Dlo(@XMM[8])`, .LSWP @ byte swap upper part +- vtbl.8 `&Dhi(@XMM[0])`, {`&Dhi(@XMM[0])`}, `&Dlo(@XMM[8])` +- vtbl.8 `&Dhi(@XMM[4])`, {`&Dhi(@XMM[4])`}, `&Dlo(@XMM[8])` +- +- vstm $keysched, {@XMM[4]} @ save adjusted round0 key +- ++ @ copy the invariant bits of the ctr ++ ldm $ctr, {r4-r5, r11} ++ mov r12, r11 ++ stm r10!, {r4-r5} ++ stm r10!, {r4-r5} ++ stm r10!, {r11-r12} ++ stm r10!, {r11-r12} ++ sub r10, #0x20 ++ ++ ldr r11, [$ctr, #0xc] @ get LSW of BE ctr ++#ifdef __ARMEL__ ++ rev r11, r11 ++#endif + b .Lctr_enc_loop + + .align 5 + .Lctr_enc_loop: + +- @ set up the addition constants +- vldr `&Dlo(@XMM[11])`, .LADD +- vmov.i8 `&Dhi(@XMM[11])`, #0 +- vmov.i8 @XMM[12], #0 +- vzip.8 `&Dlo(@XMM[11])`, `&Dhi(@XMM[11])` +- vzip.16 @XMM[11], @XMM[12] +- + @ get 8 counter values in regs and do the add +- vdup.32 @XMM[4], `&Dhi(@XMM[0])`[1] +- vdup.32 @XMM[9], `&Dhi(@XMM[0])`[1] ++ adr r4, .LADD ++ vdup.32 @XMM[4], r11 ++ vldm r4, {@XMM[11]-@XMM[12]} ++ vmov @XMM[0], @XMM[4] + vadd.u32 @XMM[4], @XMM[11] +- vadd.u32 @XMM[9], @XMM[12] +- vdup.32 @XMM[2], `&Dhi(@XMM[0])`[0] +- vdup.32 @XMM[6], `&Dhi(@XMM[0])`[0] ++ vadd.u32 @XMM[0], @XMM[12] ++#ifdef __ARMEL__ ++ vrev32.8 @XMM[4], @XMM[4] ++ vrev32.8 @XMM[0], @XMM[0] ++#endif ++ vld1.8 {@XMM[1]-@XMM[2]}, [r10] ++ vld1.8 {@XMM[5]-@XMM[6]}, [r10] + vzip.32 @XMM[2], @XMM[4] +- vzip.32 @XMM[6], @XMM[9] +- +- vmov `&Dhi(@XMM[1])`, `&Dlo(@XMM[0])` +- vmov `&Dlo(@XMM[1])`, `&Dlo(@XMM[0])` + vmov @XMM[3], @XMM[1] +- vmov @XMM[5], @XMM[1] +- vmov @XMM[7], @XMM[1] ++ vzip.32 @XMM[6], @XMM[0] ++ vmov @XMM[7], @XMM[5] + + vswp `&Dhi(@XMM[1])`, `&Dlo(@XMM[2])` + vswp `&Dhi(@XMM[3])`, `&Dlo(@XMM[4])` + vswp `&Dhi(@XMM[5])`, `&Dlo(@XMM[6])` +- vswp `&Dhi(@XMM[7])`, `&Dlo(@XMM[9])` +- +- vstm $ctr, {@XMM[9]} @ save counter +- +- @ Borrow prologue from _bsaes_encrypt8 to use the opportunity +- @ to flip byte order in 32-bit counter ++ vswp `&Dhi(@XMM[7])`, `&Dlo(@XMM[0])` + +- adr r11, .LSWPUPM0SR +- vld1.8 {@XMM[8]}, [r11]! @ .LSWPUPM0SR +- adrl $const,.LSR +- vld1.8 {@XMM[9]}, [$keysched] @ load round0 key ++ mov r4, $keysched @ pass round key + mov r5, $rounds @ pass rounds +- add r4, $keysched, #0x10 @ pass next round key +- veor @XMM[10], @XMM[0], @XMM[9] @ xor with round0 key +- veor @XMM[11], @XMM[1], @XMM[9] +- vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])` +- vtbl.8 `&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])` +- veor @XMM[12], @XMM[2], @XMM[9] +- vtbl.8 `&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])` +- vtbl.8 `&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])` +- veor @XMM[13], @XMM[3], @XMM[9] +- vtbl.8 `&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])` +- vtbl.8 `&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])` +- veor @XMM[14], @XMM[4], @XMM[9] +- vtbl.8 `&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])` +- vtbl.8 `&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])` +- veor @XMM[15], @XMM[5], @XMM[9] +- vtbl.8 `&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])` +- vtbl.8 `&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])` +- veor @XMM[10], @XMM[6], @XMM[9] +- vtbl.8 `&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])` +- vtbl.8 `&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])` +- veor @XMM[11], @XMM[7], @XMM[9] +- vtbl.8 `&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])` +- vtbl.8 `&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])` +- vtbl.8 `&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])` +- vtbl.8 `&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])` + +- bl _bsaes_encrypt8_bitslice ++ bl _bsaes_encrypt8 + + subs $len, #8 ++ add r11, #8 ++ + blo .Lctr_enc_loop_done + +- vldmia $inp!, {@XMM[8]-@XMM[15]} @ load input ++ vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ load input ++ vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! ++ vld1.8 {@XMM[12]-@XMM[13]}, [$inp]! ++ vld1.8 {@XMM[14]-@XMM[15]}, [$inp]! + + veor @XMM[8], @XMM[0] + veor @XMM[1], @XMM[9] +@@ -1353,8 +1319,6 @@ bsaes_ctr32_encrypt_blocks: + vst1.8 {@XMM[2]}, [$out]! + vst1.8 {@XMM[5]}, [$out]! + +- vldm $ctr, {@XMM[0]} @ load counter +- + bne .Lctr_enc_loop + b .Lctr_enc_done + +@@ -1393,12 +1357,13 @@ bsaes_ctr32_encrypt_blocks: + + .Lctr_enc_done: + vmov.i32 q0, #0 ++ vmov.i32 q1, #0 + .Lctr_enc_bzero: @ wipe key schedule [if any] +- vst1.8 {q0}, [$keysched]! ++ vstm $keysched!, {q0-q1} + teq $keysched, r10 + bne .Lctr_enc_bzero + +- add sp, r10, #0x10 ++ add sp, r10, #0x20 + vldmia sp!, {d8-d15} + ldmia sp!, {r4-r8, r10-r11, pc} + +--- a/crypto/evp/e_aes.c ++++ b/crypto/evp/e_aes.c +@@ -485,7 +485,8 @@ const EVP_CIPHER *EVP_aes_##keylen##_##m + + #endif + +-#if defined(AES_ASM) && defined(BSAES_ASM) && (defined(__arm__) || defined(__arm)) ++#if defined(AES_ASM) && defined(BSAES_ASM) && (defined(__arm__) || defined(__arm)) \ ++ && defined(__ARMEL__) + #include "arm_arch.h" + #define BSAES_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON) + #endif |