diff options
author | Fathi Boudra <fathi.boudra@linaro.org> | 2013-05-26 12:15:05 +0300 |
---|---|---|
committer | Fathi Boudra <fathi.boudra@linaro.org> | 2013-06-29 15:09:45 +0300 |
commit | a7dbcd9ed1d4d9cf3c5e327d4daed85e393303a1 (patch) | |
tree | 84e2fda53d6fd06ff21b3be1b211a44f917155ec | |
parent | bb612d6a59521b30e8dbe7b91cd696e2980cbf6b (diff) |
-rw-r--r-- | debian/changelog | 112 | ||||
-rw-r--r-- | debian/control | 4 | ||||
-rw-r--r-- | debian/libssl1.0.0.postinst | 2 | ||||
-rw-r--r-- | debian/patches/arm64-support | 19 | ||||
-rw-r--r-- | debian/patches/debian-targets.patch | 2 | ||||
-rw-r--r-- | debian/patches/old/0001-Added-CTR-and-CBC-decrypt-hooks-for-NEON-bit-sliced-.patch | 1518 | ||||
-rw-r--r-- | debian/patches/old/0002-bsaes-armv7.pl-Big-endian-fixes.patch | 216 | ||||
-rw-r--r-- | debian/patches/old/0003-bsaes-armv7.pl-avoid-bit-sliced-AES-CBC-for-block-si.patch | 24 | ||||
-rw-r--r-- | debian/patches/pic.patch | 18 | ||||
-rw-r--r-- | debian/patches/series | 2 | ||||
-rw-r--r-- | debian/patches/ubuntu_deb676533_arm_asm.patch | 2 |
11 files changed, 122 insertions, 1797 deletions
diff --git a/debian/changelog b/debian/changelog index 9c76aae..abb81fa 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,6 +1,6 @@ -openssl (1.0.1c-3ubuntu2.5~linaro2) quantal; urgency=low +openssl (1.0.1c-4ubuntu9~linaro1) raring; urgency=low - * Update patches - merged upstream (): + * Update patches - merged upstream: - 0000-bsaes-armv7.patch - 0000-crypto-modes-modes_lcl.h-let-STRICT_ALIGNMENT-be-on-.patch - 0001-bsaes-armv7.pl-take-it-into-build-loop.patch @@ -8,48 +8,40 @@ openssl (1.0.1c-3ubuntu2.5~linaro2) quantal; urgency=low - 0003-bsaes-armv7.pl-avoid-bit-sliced-AES-CBC-for-block-si.patch * Update debian/control: build on armhf architecture only. - -- Fathi Boudra <fathi.boudra@linaro.org> Wed, 24 Apr 2013 12:45:04 +0300 + -- Fathi Boudra <fathi.boudra@linaro.org> Sun, 26 May 2013 12:15:05 +0300 -openssl (1.0.1c-3ubuntu2.5~linaro1) quantal; urgency=low +openssl (1.0.1c-4ubuntu8) raring; urgency=low - * Add patches from Ard Biesheuvel: - - 0001-Added-CTR-and-CBC-decrypt-hooks-for-NEON-bit-sliced-.patch - - 0002-bsaes-armv7.pl-Big-endian-fixes.patch - - 0003-bsaes-armv7.pl-avoid-bit-sliced-AES-CBC-for-block-si.patch + * SECURITY UPDATE: "Lucky Thirteen" timing side-channel TLS attack + - debian/patches/CVE-2013-0169.patch: re-enabled patch and added extra + commit from upstream to fix regression. + - CVE-2013-0169 - -- Fathi Boudra <fathi.boudra@linaro.org> Tue, 16 Apr 2013 12:43:11 +0300 + -- Marc Deslauriers <marc.deslauriers@ubuntu.com> Tue, 19 Mar 2013 14:33:14 -0400 -openssl (1.0.1c-3ubuntu2.4) quantal; urgency=low +openssl (1.0.1c-4ubuntu7) raring; urgency=low - [ Dmitrijs Ledkovs ] - * Enable arm assembly code. (LP: #1083498) (Closes: #676533) * Enable optimized 64bit elliptic curve code contributed by Google. (LP: #1018522) - [ Marc Deslauriers ] - * debian/patches/fix_key_decoding_deadlock.patch: Fix possible deadlock - when decoding public keys. (LP: #1066032) - - -- Dmitrijs Ledkovs <dmitrij.ledkov@ubuntu.com> Thu, 04 Apr 2013 12:15:11 +0100 + -- Dmitrijs Ledkovs <dmitrij.ledkov@ubuntu.com> Thu, 07 Mar 2013 15:36:16 +0000 -openssl (1.0.1c-3ubuntu2.3) quantal-security; urgency=low +openssl (1.0.1c-4ubuntu6) raring; urgency=low - * SECURITY UPDATE: "Lucky Thirteen" timing side-channel TLS attack - - debian/patches/CVE-2013-0169.patch: re-enabled patch and added extra - commit from upstream to fix regression. - - CVE-2013-0169 + * debian/patches/fix_key_decoding_deadlock.patch: Fix possible deadlock + when decoding public keys. (LP: #1066032) - -- Marc Deslauriers <marc.deslauriers@ubuntu.com> Tue, 19 Mar 2013 14:35:24 -0400 + -- Marc Deslauriers <marc.deslauriers@ubuntu.com> Wed, 06 Mar 2013 08:11:19 -0500 -openssl (1.0.1c-3ubuntu2.2) quantal-security; urgency=low +openssl (1.0.1c-4ubuntu5) raring; urgency=low * REGRESSION FIX: decryption errors on AES-NI hardware (LP: #1134873, LP: #1133333) - debian/patches/CVE-2013-0169.patch: disabled for now until fix is available from upstream. - -- Marc Deslauriers <marc.deslauriers@ubuntu.com> Thu, 28 Feb 2013 10:56:42 -0500 + -- Marc Deslauriers <marc.deslauriers@ubuntu.com> Thu, 28 Feb 2013 11:01:29 -0500 -openssl (1.0.1c-3ubuntu2.1) quantal-security; urgency=low +openssl (1.0.1c-4ubuntu4) raring; urgency=low * SECURITY UPDATE: denial of service via invalid OCSP key - debian/patches/CVE-2013-0166.patch: properly handle NULL key in @@ -62,7 +54,73 @@ openssl (1.0.1c-3ubuntu2.1) quantal-security; urgency=low - Fix included in CVE-2013-0169 patch - CVE-2012-2686 - -- Marc Deslauriers <marc.deslauriers@ubuntu.com> Mon, 18 Feb 2013 13:13:42 -0500 + -- Marc Deslauriers <marc.deslauriers@ubuntu.com> Tue, 19 Feb 2013 13:25:24 -0500 + +openssl (1.0.1c-4ubuntu3) raring; urgency=low + + * Add basic arm64 support (no assembler) (LP: #1102107) + + -- Wookey <wookey@wookware.org> Sun, 20 Jan 2013 17:30:15 +0000 + +openssl (1.0.1c-4ubuntu2) raring; urgency=low + + * Enable arm assembly code. (LP: #1083498) (Closes: #676533) + + -- Dmitrijs Ledkovs <dmitrij.ledkov@ubuntu.com> Wed, 28 Nov 2012 00:08:45 +0000 + +openssl (1.0.1c-4ubuntu1) raring; urgency=low + + * Resynchronise with Debian (LP: #1077228). Remaining changes: + - debian/libssl1.0.0.postinst: + + Display a system restart required notification on libssl1.0.0 + upgrade on servers. + + Use a different priority for libssl1.0.0/restart-services depending + on whether a desktop, or server dist-upgrade is being performed. + - debian/{libssl1.0.0-udeb.dirs, control, rules}: Create + libssl1.0.0-udeb, for the benefit of wget-udeb (no wget-udeb package + in Debian). + - debian/{libcrypto1.0.0-udeb.dirs, libssl1.0.0.dirs, libssl1.0.0.files, + rules}: Move runtime libraries to /lib, for the benefit of + wpasupplicant. + - debian/patches/perlpath-quilt.patch: Don't change perl #! paths under + .pc. + - debian/rules: + + Don't run 'make test' when cross-building. + + Use host compiler when cross-building. Patch from Neil Williams. + + Don't build for processors no longer supported: i586 (on i386) + + Fix Makefile to properly clean up libs/ dirs in clean target. + + Replace duplicate files in the doc directory with symlinks. + - Unapply patch c_rehash-multi and comment it out in the series as it + breaks parsing of certificates with CRLF line endings and other cases + (see Debian #642314 for discussion), it also changes the semantics of + c_rehash directories by requiring applications to parse hash link + targets as files containing potentially *multiple* certificates rather + than exactly one. + - Bump version passed to dh_makeshlibs to 1.0.1 for new symbols. + - debian/patches/tls12_workarounds.patch: Workaround large client hello + issues when TLS 1.1 and lower is in use + - debian/control: Mark Debian Vcs-* as XS-Debian-Vcs-* + * Dropped changes: + - Drop openssl-doc in favour of the libssl-doc package introduced by + Debian. Add Conflicts/Replaces until the next LTS release. + + Drop the Conflicts/Replaces because 12.04 LTS was 'the next LTS + release' + + -- Tyler Hicks <tyhicks@canonical.com> Fri, 09 Nov 2012 14:49:13 -0800 + +openssl (1.0.1c-4) unstable; urgency=low + + * Fix the configure rules for alpha (Closes: #672710) + * Switch the postinst to sh again, there never was a reason to + switch it to bash (Closes: #676398) + * Fix pic.patch to not use #ifdef in x86cpuid.s, only .S files are + preprocessed. We generate the file again for pic anyway. + (Closes: #677468) + * Drop Breaks against openssh as it was only for upgrades + between versions that were only in testing/unstable. + (Closes: #668600) + + -- Kurt Roeckx <kurt@roeckx.be> Tue, 17 Jul 2012 11:49:19 +0200 openssl (1.0.1c-3ubuntu2) quantal; urgency=low diff --git a/debian/control b/debian/control index 8868169..b2c0212 100644 --- a/debian/control +++ b/debian/control @@ -34,7 +34,6 @@ Architecture: armhf Multi-Arch: same Pre-Depends: ${misc:Pre-Depends} Depends: ${shlibs:Depends}, ${misc:Depends} -Breaks: openssh-client (<< 1:5.9p1-4), openssh-server (<< 1:5.9p1-4) Description: SSL shared libraries libssl and libcrypto shared libraries needed by programs like apache-ssl, telnet-ssl and openssh. @@ -79,8 +78,7 @@ Package: libssl-doc Section: doc Priority: optional Architecture: all -Replaces: libssl-dev (<< 1.0.0), openssl-doc -Conflicts: openssl-doc +Replaces: libssl-dev (<< 1.0.0) Breaks: libssl-dev (<< 1.0.0) Depends: ${shlibs:Depends}, ${perl:Depends}, ${misc:Depends} Description: SSL development documentation documentation diff --git a/debian/libssl1.0.0.postinst b/debian/libssl1.0.0.postinst index 4e8a17c..57ae577 100644 --- a/debian/libssl1.0.0.postinst +++ b/debian/libssl1.0.0.postinst @@ -1,4 +1,4 @@ -#!/bin/bash +#!/bin/sh . /usr/share/debconf/confmodule diff --git a/debian/patches/arm64-support b/debian/patches/arm64-support new file mode 100644 index 0000000..a2f6cd6 --- /dev/null +++ b/debian/patches/arm64-support @@ -0,0 +1,19 @@ +Description: Add arm64 support + Add 'debian-arm64' to configure so it at least tries to build +Author: Wookey <wookey@wookware.org>, Riku Voipio <riku.voipio@linaro.org> +Last-Update: <2013-01-20> + +--- + Configure | 1 + + 1 file changed, 1 insertion(+) + +--- a/Configure ++++ b/Configure +@@ -346,6 +346,7 @@ my %table=( + "debian-alpha","gcc:-DTERMIO ${debian_cflags}::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_UNROLL:${alpha_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", + "debian-alpha-ev4","gcc:-DTERMIO ${debian_cflags} -mcpu=ev4::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_UNROLL:${alpha_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", + "debian-alpha-ev5","gcc:-DTERMIO ${debian_cflags} -mcpu=ev5::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_UNROLL:${alpha_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", ++"debian-arm64","gcc:-DL_ENDIAN -DTERMIO ${debian_cflags}::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", + "debian-armel","gcc:-DL_ENDIAN -DTERMIO ${debian_cflags}::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", + "debian-armhf","gcc:-DL_ENDIAN -DTERMIO ${debian_cflags}::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", + "debian-amd64", "gcc:-m64 -DL_ENDIAN -DTERMIO ${debian_cflags} -DMD32_REG_T=int::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_INT DES_UNROLL:${x86_64_asm}:elf:dlfcn:linux-shared:-fPIC:-m64:.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR):::", diff --git a/debian/patches/debian-targets.patch b/debian/patches/debian-targets.patch index 0727967..1a4da29 100644 --- a/debian/patches/debian-targets.patch +++ b/debian/patches/debian-targets.patch @@ -20,7 +20,7 @@ "tru64-alpha-cc", "cc:-std1 -tune host -fast -readonly_strings::-pthread:::SIXTY_FOUR_BIT_LONG RC4_CHUNK:${alpha_asm}:dlfcn:alpha-osf1-shared::-msym:.so", +# Debian GNU/* (various architectures) -+"debian-alpha","gcc:-DTERMIO $debian_cflag::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_UNROLL:${alpha_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", ++"debian-alpha","gcc:-DTERMIO ${debian_cflags}::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_UNROLL:${alpha_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"debian-alpha-ev4","gcc:-DTERMIO ${debian_cflags} -mcpu=ev4::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_UNROLL:${alpha_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"debian-alpha-ev5","gcc:-DTERMIO ${debian_cflags} -mcpu=ev5::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_UNROLL:${alpha_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", +"debian-armeb","gcc:-DB_ENDIAN -DTERMIO ${debian_cflags}::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", diff --git a/debian/patches/old/0001-Added-CTR-and-CBC-decrypt-hooks-for-NEON-bit-sliced-.patch b/debian/patches/old/0001-Added-CTR-and-CBC-decrypt-hooks-for-NEON-bit-sliced-.patch deleted file mode 100644 index 0ebaba8..0000000 --- a/debian/patches/old/0001-Added-CTR-and-CBC-decrypt-hooks-for-NEON-bit-sliced-.patch +++ /dev/null @@ -1,1518 +0,0 @@ -From 5e51c6b42b8b7d773ab45dcabec1189a1451bebd Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ard.biesheuvel@linaro.org> -Date: Thu, 11 Apr 2013 17:33:36 +0200 -Subject: [PATCH 1/3] Added CTR and CBC-decrypt hooks for NEON bit sliced AES - -The actual bit sliced AES transform was already there, only the -hooks were missing. These are based heavily on the existing hooks -for x86_64. ---- - Configure | 2 - crypto/aes/Makefile | 3 - crypto/aes/asm/bsaes-armv7.pl | 1447 ++++++++++++++++++++++++++++++++++++++++++ - crypto/evp/e_aes.c | 8 - 4 files changed, 1458 insertions(+), 2 deletions(-) - ---- a/Configure -+++ b/Configure -@@ -140,7 +140,7 @@ my $alpha_asm="alphacpuid.o:bn_asm.o alp - my $mips32_asm=":bn-mips.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o::::::::"; - my $mips64_asm=":bn-mips.o mips-mont.o::aes_cbc.o aes-mips.o:::sha1-mips.o sha256-mips.o sha512-mips.o::::::::"; - my $s390x_asm="s390xcap.o s390xcpuid.o:bn-s390x.o s390x-mont.o s390x-gf2m.o::aes-s390x.o aes-ctr.o aes-xts.o:::sha1-s390x.o sha256-s390x.o sha512-s390x.o::rc4-s390x.o:::::ghash-s390x.o:"; --my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o::void"; -+my $armv4_asm="armcap.o armv4cpuid.o:bn_asm.o armv4-mont.o armv4-gf2m.o::aes_cbc.o aes-armv4.o bsaes-armv7.o:::sha1-armv4-large.o sha256-armv4.o sha512-armv4.o:::::::ghash-armv4.o::void"; - my $parisc11_asm="pariscid.o:bn_asm.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::32"; - my $parisc20_asm="pariscid.o:pa-risc2W.o parisc-mont.o::aes_core.o aes_cbc.o aes-parisc.o:::sha1-parisc.o sha256-parisc.o sha512-parisc.o::rc4-parisc.o:::::ghash-parisc.o::64"; - my $ppc32_asm="ppccpuid.o ppccap.o:bn-ppc.o ppc-mont.o ppc64-mont.o::aes_core.o aes_cbc.o aes-ppc.o:::sha1-ppc.o sha256-ppc.o::::::::"; ---- a/crypto/aes/Makefile -+++ b/crypto/aes/Makefile -@@ -79,8 +79,9 @@ aes-mips.S: asm/aes-mips.pl - $(PERL) asm/aes-mips.pl $(PERLASM_SCHEME) $@ - - # GNU make "catch all" --aes-%.S: asm/aes-%.pl; $(PERL) $< $(PERLASM_SCHEME) > $@ -+%.S: asm/%.pl; $(PERL) $< $(PERLASM_SCHEME) > $@ - aes-armv4.o: aes-armv4.S -+bsaes-armv7.o: bsaes-armv7.S - - files: - $(PERL) $(TOP)/util/files.pl Makefile >> $(TOP)/MINFO ---- /dev/null -+++ b/crypto/aes/asm/bsaes-armv7.pl -@@ -0,0 +1,1447 @@ -+#!/usr/bin/env perl -+ -+# ==================================================================== -+# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL -+# project. The module is, however, dual licensed under OpenSSL and -+# CRYPTOGAMS licenses depending on where you obtain it. For further -+# details see http://www.openssl.org/~appro/cryptogams/. -+# ==================================================================== -+ -+# Bit-sliced AES for ARM NEON -+# -+# February 2012. -+# -+# This implementation is direct adaptation of bsaes-x86_64 module for -+# ARM NEON. Except that this module is endian-neutral [in sense that -+# it can be compiled for either endianness] by courtesy of vld1.8's -+# neutrality. Initial version doesn't implement interface to OpenSSL, -+# only low-level primitives and unsupported entry points, just enough -+# to collect performance results, which for Cortex-A8 core are: -+# -+# encrypt 19.5 cycles per byte processed with 128-bit key -+# decrypt 24.0 cycles per byte processed with 128-bit key -+# key conv. 440 cycles per 128-bit key/0.18 of 8x block -+# -+# Snapdragon S4 encrypts byte in 17.6 cycles and decrypts in 22.6, -+# which is [much] worse than anticipated (for further details see -+# http://www.openssl.org/~appro/Snapdragon-S4.html). -+# -+# When comparing to x86_64 results keep in mind that NEON unit is -+# [mostly] single-issue and thus can't [fully] benefit from -+# instruction-level parallelism. And when comparing to aes-armv4 -+# results keep in mind key schedule conversion overhead (see -+# bsaes-x86_64.pl for further details)... -+# -+# <appro@openssl.org> -+ -+while (($output=shift) && ($output!~/^\w[\w\-]*\.\w+$/)) {} -+open STDOUT,">$output"; -+ -+my ($inp,$out,$len,$key)=("r0","r1","r2","r3"); -+my @XMM=map("q$_",(0..15)); -+ -+{ -+my ($key,$rounds,$const)=("r4","r5","r6"); -+ -+sub Dlo() { shift=~m|q([1]?[0-9])|?"d".($1*2):""; } -+sub Dhi() { shift=~m|q([1]?[0-9])|?"d".($1*2+1):""; } -+ -+sub Sbox { -+# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb -+# output in lsb > [b0, b1, b4, b6, b3, b7, b2, b5] < msb -+my @b=@_[0..7]; -+my @t=@_[8..11]; -+my @s=@_[12..15]; -+ &InBasisChange (@b); -+ &Inv_GF256 (@b[6,5,0,3,7,1,4,2],@t,@s); -+ &OutBasisChange (@b[7,1,4,2,6,5,0,3]); -+} -+ -+sub InBasisChange { -+# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb -+# output in lsb > [b6, b5, b0, b3, b7, b1, b4, b2] < msb -+my @b=@_[0..7]; -+$code.=<<___; -+ veor @b[2], @b[2], @b[1] -+ veor @b[5], @b[5], @b[6] -+ veor @b[3], @b[3], @b[0] -+ veor @b[6], @b[6], @b[2] -+ veor @b[5], @b[5], @b[0] -+ -+ veor @b[6], @b[6], @b[3] -+ veor @b[3], @b[3], @b[7] -+ veor @b[7], @b[7], @b[5] -+ veor @b[3], @b[3], @b[4] -+ veor @b[4], @b[4], @b[5] -+ -+ veor @b[2], @b[2], @b[7] -+ veor @b[3], @b[3], @b[1] -+ veor @b[1], @b[1], @b[5] -+___ -+} -+ -+sub OutBasisChange { -+# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb -+# output in lsb > [b6, b1, b2, b4, b7, b0, b3, b5] < msb -+my @b=@_[0..7]; -+$code.=<<___; -+ veor @b[0], @b[0], @b[6] -+ veor @b[1], @b[1], @b[4] -+ veor @b[4], @b[4], @b[6] -+ veor @b[2], @b[2], @b[0] -+ veor @b[6], @b[6], @b[1] -+ -+ veor @b[1], @b[1], @b[5] -+ veor @b[5], @b[5], @b[3] -+ veor @b[3], @b[3], @b[7] -+ veor @b[7], @b[7], @b[5] -+ veor @b[2], @b[2], @b[5] -+ -+ veor @b[4], @b[4], @b[7] -+___ -+} -+ -+sub InvSbox { -+# input in lsb > [b0, b1, b2, b3, b4, b5, b6, b7] < msb -+# output in lsb > [b0, b1, b6, b4, b2, b7, b3, b5] < msb -+my @b=@_[0..7]; -+my @t=@_[8..11]; -+my @s=@_[12..15]; -+ &InvInBasisChange (@b); -+ &Inv_GF256 (@b[5,1,2,6,3,7,0,4],@t,@s); -+ &InvOutBasisChange (@b[3,7,0,4,5,1,2,6]); -+} -+ -+sub InvInBasisChange { # OutBasisChange in reverse (with twist) -+my @b=@_[5,1,2,6,3,7,0,4]; -+$code.=<<___ -+ veor @b[1], @b[1], @b[7] -+ veor @b[4], @b[4], @b[7] -+ -+ veor @b[7], @b[7], @b[5] -+ veor @b[1], @b[1], @b[3] -+ veor @b[2], @b[2], @b[5] -+ veor @b[3], @b[3], @b[7] -+ -+ veor @b[6], @b[6], @b[1] -+ veor @b[2], @b[2], @b[0] -+ veor @b[5], @b[5], @b[3] -+ veor @b[4], @b[4], @b[6] -+ veor @b[0], @b[0], @b[6] -+ veor @b[1], @b[1], @b[4] -+___ -+} -+ -+sub InvOutBasisChange { # InBasisChange in reverse -+my @b=@_[2,5,7,3,6,1,0,4]; -+$code.=<<___; -+ veor @b[1], @b[1], @b[5] -+ veor @b[2], @b[2], @b[7] -+ -+ veor @b[3], @b[3], @b[1] -+ veor @b[4], @b[4], @b[5] -+ veor @b[7], @b[7], @b[5] -+ veor @b[3], @b[3], @b[4] -+ veor @b[5], @b[5], @b[0] -+ veor @b[3], @b[3], @b[7] -+ veor @b[6], @b[6], @b[2] -+ veor @b[2], @b[2], @b[1] -+ veor @b[6], @b[6], @b[3] -+ -+ veor @b[3], @b[3], @b[0] -+ veor @b[5], @b[5], @b[6] -+___ -+} -+ -+sub Mul_GF4 { -+#;************************************************************* -+#;* Mul_GF4: Input x0-x1,y0-y1 Output x0-x1 Temp t0 (8) * -+#;************************************************************* -+my ($x0,$x1,$y0,$y1,$t0,$t1)=@_; -+$code.=<<___; -+ veor $t0, $y0, $y1 -+ vand $t0, $t0, $x0 -+ veor $x0, $x0, $x1 -+ vand $t1, $x1, $y0 -+ vand $x0, $x0, $y1 -+ veor $x1, $t1, $t0 -+ veor $x0, $x0, $t1 -+___ -+} -+ -+sub Mul_GF4_N { # not used, see next subroutine -+# multiply and scale by N -+my ($x0,$x1,$y0,$y1,$t0)=@_; -+$code.=<<___; -+ veor $t0, $y0, $y1 -+ vand $t0, $t0, $x0 -+ veor $x0, $x0, $x1 -+ vand $x1, $x1, $y0 -+ vand $x0, $x0, $y1 -+ veor $x1, $x1, $x0 -+ veor $x0, $x0, $t0 -+___ -+} -+ -+sub Mul_GF4_N_GF4 { -+# interleaved Mul_GF4_N and Mul_GF4 -+my ($x0,$x1,$y0,$y1,$t0, -+ $x2,$x3,$y2,$y3,$t1)=@_; -+$code.=<<___; -+ veor $t0, $y0, $y1 -+ veor $t1, $y2, $y3 -+ vand $t0, $t0, $x0 -+ vand $t1, $t1, $x2 -+ veor $x0, $x0, $x1 -+ veor $x2, $x2, $x3 -+ vand $x1, $x1, $y0 -+ vand $x3, $x3, $y2 -+ vand $x0, $x0, $y1 -+ vand $x2, $x2, $y3 -+ veor $x1, $x1, $x0 -+ veor $x2, $x2, $x3 -+ veor $x0, $x0, $t0 -+ veor $x3, $x3, $t1 -+___ -+} -+sub Mul_GF16_2 { -+my @x=@_[0..7]; -+my @y=@_[8..11]; -+my @t=@_[12..15]; -+$code.=<<___; -+ veor @t[0], @x[0], @x[2] -+ veor @t[1], @x[1], @x[3] -+___ -+ &Mul_GF4 (@x[0], @x[1], @y[0], @y[1], @t[2..3]); -+$code.=<<___; -+ veor @y[0], @y[0], @y[2] -+ veor @y[1], @y[1], @y[3] -+___ -+ Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3], -+ @x[2], @x[3], @y[2], @y[3], @t[2]); -+$code.=<<___; -+ veor @x[0], @x[0], @t[0] -+ veor @x[2], @x[2], @t[0] -+ veor @x[1], @x[1], @t[1] -+ veor @x[3], @x[3], @t[1] -+ -+ veor @t[0], @x[4], @x[6] -+ veor @t[1], @x[5], @x[7] -+___ -+ &Mul_GF4_N_GF4 (@t[0], @t[1], @y[0], @y[1], @t[3], -+ @x[6], @x[7], @y[2], @y[3], @t[2]); -+$code.=<<___; -+ veor @y[0], @y[0], @y[2] -+ veor @y[1], @y[1], @y[3] -+___ -+ &Mul_GF4 (@x[4], @x[5], @y[0], @y[1], @t[2..3]); -+$code.=<<___; -+ veor @x[4], @x[4], @t[0] -+ veor @x[6], @x[6], @t[0] -+ veor @x[5], @x[5], @t[1] -+ veor @x[7], @x[7], @t[1] -+___ -+} -+sub Inv_GF256 { -+#;******************************************************************** -+#;* Inv_GF256: Input x0-x7 Output x0-x7 Temp t0-t3,s0-s3 (144) * -+#;******************************************************************** -+my @x=@_[0..7]; -+my @t=@_[8..11]; -+my @s=@_[12..15]; -+# direct optimizations from hardware -+$code.=<<___; -+ veor @t[3], @x[4], @x[6] -+ veor @t[2], @x[5], @x[7] -+ veor @t[1], @x[1], @x[3] -+ veor @s[1], @x[7], @x[6] -+ vmov @t[0], @t[2] -+ veor @s[0], @x[0], @x[2] -+ -+ vorr @t[2], @t[2], @t[1] -+ veor @s[3], @t[3], @t[0] -+ vand @s[2], @t[3], @s[0] -+ vorr @t[3], @t[3], @s[0] -+ veor @s[0], @s[0], @t[1] -+ vand @t[0], @t[0], @t[1] -+ veor @t[1], @x[3], @x[2] -+ vand @s[3], @s[3], @s[0] -+ vand @s[1], @s[1], @t[1] -+ veor @t[1], @x[4], @x[5] -+ veor @s[0], @x[1], @x[0] -+ veor @t[3], @t[3], @s[1] -+ veor @t[2], @t[2], @s[1] -+ vand @s[1], @t[1], @s[0] -+ vorr @t[1], @t[1], @s[0] -+ veor @t[3], @t[3], @s[3] -+ veor @t[0], @t[0], @s[1] -+ veor @t[2], @t[2], @s[2] -+ veor @t[1], @t[1], @s[3] -+ veor @t[0], @t[0], @s[2] -+ vand @s[0], @x[7], @x[3] -+ veor @t[1], @t[1], @s[2] -+ vand @s[1], @x[6], @x[2] -+ vand @s[2], @x[5], @x[1] -+ vorr @s[3], @x[4], @x[0] -+ veor @t[3], @t[3], @s[0] -+ veor @t[1], @t[1], @s[2] -+ veor @t[0], @t[0], @s[3] -+ veor @t[2], @t[2], @s[1] -+ -+ @ Inv_GF16 \t0, \t1, \t2, \t3, \s0, \s1, \s2, \s3 -+ -+ @ new smaller inversion -+ -+ vand @s[2], @t[3], @t[1] -+ vmov @s[0], @t[0] -+ -+ veor @s[1], @t[2], @s[2] -+ veor @s[3], @t[0], @s[2] -+ veor @s[2], @t[0], @s[2] @ @s[2]=@s[3] -+ -+ vbsl @s[1], @t[1], @t[0] -+ vbsl @s[3], @t[3], @t[2] -+ veor @t[3], @t[3], @t[2] -+ -+ vbsl @s[0], @s[1], @s[2] -+ vbsl @t[0], @s[2], @s[1] -+ -+ vand @s[2], @s[0], @s[3] -+ veor @t[1], @t[1], @t[0] -+ -+ veor @s[2], @s[2], @t[3] -+___ -+# output in s3, s2, s1, t1 -+ -+# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \t2, \t3, \t0, \t1, \s0, \s1, \s2, \s3 -+ -+# Mul_GF16_2 \x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3 -+ &Mul_GF16_2(@x,@s[3,2,1],@t[1],@s[0],@t[0,2,3]); -+ -+### output msb > [x3,x2,x1,x0,x7,x6,x5,x4] < lsb -+} -+ -+# AES linear components -+ -+sub ShiftRows { -+my @x=@_[0..7]; -+my @t=@_[8..11]; -+my $mask=pop; -+$code.=<<___; -+ vldmia $key!, {@t[0]-@t[3]} -+ veor @t[0], @t[0], @x[0] -+ veor @t[1], @t[1], @x[1] -+ vtbl.8 `&Dlo(@x[0])`, {@t[0]}, `&Dlo($mask)` -+ vtbl.8 `&Dhi(@x[0])`, {@t[0]}, `&Dhi($mask)` -+ vldmia $key!, {@t[0]} -+ veor @t[2], @t[2], @x[2] -+ vtbl.8 `&Dlo(@x[1])`, {@t[1]}, `&Dlo($mask)` -+ vtbl.8 `&Dhi(@x[1])`, {@t[1]}, `&Dhi($mask)` -+ vldmia $key!, {@t[1]} -+ veor @t[3], @t[3], @x[3] -+ vtbl.8 `&Dlo(@x[2])`, {@t[2]}, `&Dlo($mask)` -+ vtbl.8 `&Dhi(@x[2])`, {@t[2]}, `&Dhi($mask)` -+ vldmia $key!, {@t[2]} -+ vtbl.8 `&Dlo(@x[3])`, {@t[3]}, `&Dlo($mask)` -+ vtbl.8 `&Dhi(@x[3])`, {@t[3]}, `&Dhi($mask)` -+ vldmia $key!, {@t[3]} -+ veor @t[0], @t[0], @x[4] -+ veor @t[1], @t[1], @x[5] -+ vtbl.8 `&Dlo(@x[4])`, {@t[0]}, `&Dlo($mask)` -+ vtbl.8 `&Dhi(@x[4])`, {@t[0]}, `&Dhi($mask)` -+ veor @t[2], @t[2], @x[6] -+ vtbl.8 `&Dlo(@x[5])`, {@t[1]}, `&Dlo($mask)` -+ vtbl.8 `&Dhi(@x[5])`, {@t[1]}, `&Dhi($mask)` -+ veor @t[3], @t[3], @x[7] -+ vtbl.8 `&Dlo(@x[6])`, {@t[2]}, `&Dlo($mask)` -+ vtbl.8 `&Dhi(@x[6])`, {@t[2]}, `&Dhi($mask)` -+ vtbl.8 `&Dlo(@x[7])`, {@t[3]}, `&Dlo($mask)` -+ vtbl.8 `&Dhi(@x[7])`, {@t[3]}, `&Dhi($mask)` -+___ -+} -+ -+sub MixColumns { -+# modified to emit output in order suitable for feeding back to aesenc[last] -+my @x=@_[0..7]; -+my @t=@_[8..15]; -+$code.=<<___; -+ vext.8 @t[0], @x[0], @x[0], #12 @ x0 <<< 32 -+ vext.8 @t[1], @x[1], @x[1], #12 -+ veor @x[0], @x[0], @t[0] @ x0 ^ (x0 <<< 32) -+ vext.8 @t[2], @x[2], @x[2], #12 -+ veor @x[1], @x[1], @t[1] -+ vext.8 @t[3], @x[3], @x[3], #12 -+ veor @x[2], @x[2], @t[2] -+ vext.8 @t[4], @x[4], @x[4], #12 -+ veor @x[3], @x[3], @t[3] -+ vext.8 @t[5], @x[5], @x[5], #12 -+ veor @x[4], @x[4], @t[4] -+ vext.8 @t[6], @x[6], @x[6], #12 -+ veor @x[5], @x[5], @t[5] -+ vext.8 @t[7], @x[7], @x[7], #12 -+ veor @x[6], @x[6], @t[6] -+ -+ veor @t[1], @t[1], @x[0] -+ veor @x[7], @x[7], @t[7] -+ vext.8 @x[0], @x[0], @x[0], #8 @ (x0 ^ (x0 <<< 32)) <<< 64) -+ veor @t[2], @t[2], @x[1] -+ veor @t[0], @t[0], @x[7] -+ veor @t[1], @t[1], @x[7] -+ vext.8 @x[1], @x[1], @x[1], #8 -+ veor @t[5], @t[5], @x[4] -+ veor @x[0], @x[0], @t[0] -+ veor @t[6], @t[6], @x[5] -+ veor @x[1], @x[1], @t[1] -+ vext.8 @t[0], @x[4], @x[4], #8 -+ veor @t[4], @t[4], @x[3] -+ vext.8 @t[1], @x[5], @x[5], #8 -+ veor @t[7], @t[7], @x[6] -+ vext.8 @x[4], @x[3], @x[3], #8 -+ veor @t[3], @t[3], @x[2] -+ vext.8 @x[5], @x[7], @x[7], #8 -+ veor @t[4], @t[4], @x[7] -+ vext.8 @x[3], @x[6], @x[6], #8 -+ veor @t[3], @t[3], @x[7] -+ vext.8 @x[6], @x[2], @x[2], #8 -+ veor @x[7], @t[1], @t[5] -+ veor @x[2], @t[0], @t[4] -+ -+ veor @x[4], @x[4], @t[3] -+ veor @x[5], @x[5], @t[7] -+ veor @x[3], @x[3], @t[6] -+ @ vmov @x[2], @t[0] -+ veor @x[6], @x[6], @t[2] -+ @ vmov @x[7], @t[1] -+___ -+} -+ -+sub InvMixColumns { -+my @x=@_[0..7]; -+my @t=@_[8..15]; -+ -+$code.=<<___; -+ @ multiplication by 0x0e -+ vext.8 @t[7], @x[7], @x[7], #12 -+ vmov @t[2], @x[2] -+ veor @x[2], @x[2], @x[5] @ 2 5 -+ veor @x[7], @x[7], @x[5] @ 7 5 -+ vext.8 @t[0], @x[0], @x[0], #12 -+ vmov @t[5], @x[5] -+ veor @x[5], @x[5], @x[0] @ 5 0 [1] -+ veor @x[0], @x[0], @x[1] @ 0 1 -+ vext.8 @t[1], @x[1], @x[1], #12 -+ veor @x[1], @x[1], @x[2] @ 1 25 -+ veor @x[0], @x[0], @x[6] @ 01 6 [2] -+ vext.8 @t[3], @x[3], @x[3], #12 -+ veor @x[1], @x[1], @x[3] @ 125 3 [4] -+ veor @x[2], @x[2], @x[0] @ 25 016 [3] -+ veor @x[3], @x[3], @x[7] @ 3 75 -+ veor @x[7], @x[7], @x[6] @ 75 6 [0] -+ vext.8 @t[6], @x[6], @x[6], #12 -+ vmov @t[4], @x[4] -+ veor @x[6], @x[6], @x[4] @ 6 4 -+ veor @x[4], @x[4], @x[3] @ 4 375 [6] -+ veor @x[3], @x[3], @x[7] @ 375 756=36 -+ veor @x[6], @x[6], @t[5] @ 64 5 [7] -+ veor @x[3], @x[3], @t[2] @ 36 2 -+ vext.8 @t[5], @t[5], @t[5], #12 -+ veor @x[3], @x[3], @t[4] @ 362 4 [5] -+___ -+ my @y = @x[7,5,0,2,1,3,4,6]; -+$code.=<<___; -+ @ multiplication by 0x0b -+ veor @y[1], @y[1], @y[0] -+ veor @y[0], @y[0], @t[0] -+ vext.8 @t[2], @t[2], @t[2], #12 -+ veor @y[1], @y[1], @t[1] -+ veor @y[0], @y[0], @t[5] -+ vext.8 @t[4], @t[4], @t[4], #12 -+ veor @y[1], @y[1], @t[6] -+ veor @y[0], @y[0], @t[7] -+ veor @t[7], @t[7], @t[6] @ clobber t[7] -+ -+ veor @y[3], @y[3], @t[0] -+ veor @y[1], @y[1], @y[0] -+ vext.8 @t[0], @t[0], @t[0], #12 -+ veor @y[2], @y[2], @t[1] -+ veor @y[4], @y[4], @t[1] -+ vext.8 @t[1], @t[1], @t[1], #12 -+ veor @y[2], @y[2], @t[2] -+ veor @y[3], @y[3], @t[2] -+ veor @y[5], @y[5], @t[2] -+ veor @y[2], @y[2], @t[7] -+ vext.8 @t[2], @t[2], @t[2], #12 -+ veor @y[3], @y[3], @t[3] -+ veor @y[6], @y[6], @t[3] -+ veor @y[4], @y[4], @t[3] -+ veor @y[7], @y[7], @t[4] -+ vext.8 @t[3], @t[3], @t[3], #12 -+ veor @y[5], @y[5], @t[4] -+ veor @y[7], @y[7], @t[7] -+ veor @t[7], @t[7], @t[5] @ clobber t[7] even more -+ veor @y[3], @y[3], @t[5] -+ veor @y[4], @y[4], @t[4] -+ -+ veor @y[5], @y[5], @t[7] -+ vext.8 @t[4], @t[4], @t[4], #12 -+ veor @y[6], @y[6], @t[7] -+ veor @y[4], @y[4], @t[7] -+ -+ veor @t[7], @t[7], @t[5] -+ vext.8 @t[5], @t[5], @t[5], #12 -+ -+ @ multiplication by 0x0d -+ veor @y[4], @y[4], @y[7] -+ veor @t[7], @t[7], @t[6] @ restore t[7] -+ veor @y[7], @y[7], @t[4] -+ vext.8 @t[6], @t[6], @t[6], #12 -+ veor @y[2], @y[2], @t[0] -+ veor @y[7], @y[7], @t[5] -+ vext.8 @t[7], @t[7], @t[7], #12 -+ veor @y[2], @y[2], @t[2] -+ -+ veor @y[3], @y[3], @y[1] -+ veor @y[1], @y[1], @t[1] -+ veor @y[0], @y[0], @t[0] -+ veor @y[3], @y[3], @t[0] -+ veor @y[1], @y[1], @t[5] -+ veor @y[0], @y[0], @t[5] -+ vext.8 @t[0], @t[0], @t[0], #12 -+ veor @y[1], @y[1], @t[7] -+ veor @y[0], @y[0], @t[6] -+ veor @y[3], @y[3], @y[1] -+ veor @y[4], @y[4], @t[1] -+ vext.8 @t[1], @t[1], @t[1], #12 -+ -+ veor @y[7], @y[7], @t[7] -+ veor @y[4], @y[4], @t[2] -+ veor @y[5], @y[5], @t[2] -+ veor @y[2], @y[2], @t[6] -+ veor @t[6], @t[6], @t[3] @ clobber t[6] -+ vext.8 @t[2], @t[2], @t[2], #12 -+ veor @y[4], @y[4], @y[7] -+ veor @y[3], @y[3], @t[6] -+ -+ veor @y[6], @y[6], @t[6] -+ veor @y[5], @y[5], @t[5] -+ vext.8 @t[5], @t[5], @t[5], #12 -+ veor @y[6], @y[6], @t[4] -+ vext.8 @t[4], @t[4], @t[4], #12 -+ veor @y[5], @y[5], @t[6] -+ veor @y[6], @y[6], @t[7] -+ vext.8 @t[7], @t[7], @t[7], #12 -+ veor @t[6], @t[6], @t[3] @ restore t[6] -+ vext.8 @t[3], @t[3], @t[3], #12 -+ -+ @ multiplication by 0x09 -+ veor @y[4], @y[4], @y[1] -+ veor @t[1], @t[1], @y[1] @ t[1]=y[1] -+ veor @t[0], @t[0], @t[5] @ clobber t[0] -+ vext.8 @t[6], @t[6], @t[6], #12 -+ veor @t[1], @t[1], @t[5] -+ veor @y[3], @y[3], @t[0] -+ veor @t[0], @t[0], @y[0] @ t[0]=y[0] -+ veor @t[1], @t[1], @t[6] -+ veor @t[6], @t[6], @t[7] @ clobber t[6] -+ veor @y[4], @y[4], @t[1] -+ veor @y[7], @y[7], @t[4] -+ veor @y[6], @y[6], @t[3] -+ veor @y[5], @y[5], @t[2] -+ veor @t[4], @t[4], @y[4] @ t[4]=y[4] -+ veor @t[3], @t[3], @y[3] @ t[3]=y[3] -+ veor @t[5], @t[5], @y[5] @ t[5]=y[5] -+ veor @t[2], @t[2], @y[2] @ t[2]=y[2] -+ veor @t[3], @t[3], @t[7] -+ veor @XMM[5], @t[5], @t[6] -+ veor @XMM[6], @t[6], @y[6] @ t[6]=y[6] -+ veor @XMM[2], @t[2], @t[6] -+ veor @XMM[7], @t[7], @y[7] @ t[7]=y[7] -+ -+ vmov @XMM[0], @t[0] -+ vmov @XMM[1], @t[1] -+ @ vmov @XMM[2], @t[2] -+ vmov @XMM[3], @t[3] -+ vmov @XMM[4], @t[4] -+ @ vmov @XMM[5], @t[5] -+ @ vmov @XMM[6], @t[6] -+ @ vmov @XMM[7], @t[7] -+___ -+} -+ -+sub swapmove { -+my ($a,$b,$n,$mask,$t)=@_; -+$code.=<<___; -+ vshr.u64 $t, $b, #$n -+ veor $t, $t, $a -+ vand $t, $t, $mask -+ veor $a, $a, $t -+ vshl.u64 $t, $t, #$n -+ veor $b, $b, $t -+___ -+} -+sub swapmove2x { -+my ($a0,$b0,$a1,$b1,$n,$mask,$t0,$t1)=@_; -+$code.=<<___; -+ vshr.u64 $t0, $b0, #$n -+ vshr.u64 $t1, $b1, #$n -+ veor $t0, $t0, $a0 -+ veor $t1, $t1, $a1 -+ vand $t0, $t0, $mask -+ vand $t1, $t1, $mask -+ veor $a0, $a0, $t0 -+ vshl.u64 $t0, $t0, #$n -+ veor $a1, $a1, $t1 -+ vshl.u64 $t1, $t1, #$n -+ veor $b0, $b0, $t0 -+ veor $b1, $b1, $t1 -+___ -+} -+ -+sub bitslice { -+my @x=reverse(@_[0..7]); -+my ($t0,$t1,$t2,$t3)=@_[8..11]; -+$code.=<<___; -+ vmov.i8 $t0,#0x55 @ compose .LBS0 -+ vmov.i8 $t1,#0x33 @ compose .LBS1 -+___ -+ &swapmove2x(@x[0,1,2,3],1,$t0,$t2,$t3); -+ &swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3); -+$code.=<<___; -+ vmov.i8 $t0,#0x0f @ compose .LBS2 -+___ -+ &swapmove2x(@x[0,2,1,3],2,$t1,$t2,$t3); -+ &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3); -+ -+ &swapmove2x(@x[0,4,1,5],4,$t0,$t2,$t3); -+ &swapmove2x(@x[2,6,3,7],4,$t0,$t2,$t3); -+} -+ -+$code.=<<___; -+ .text -+ .code 32 -+ .fpu neon -+ -+ .align 4 -+.LM0ISR: @ InvShiftRows constants -+ .quad 0x0a0e0206070b0f03, 0x0004080c0d010509 -+.LISR: -+ .quad 0x0504070602010003, 0x0f0e0d0c080b0a09 -+.LISRM0: -+ .quad 0x01040b0e0205080f, 0x0306090c00070a0d -+ -+ .type _bsaes_decrypt8,%function -+_bsaes_decrypt8: -+ adr $const,.LM0ISR -+ vldmia $key!, {@XMM[9]} @ round 0 key -+ -+ vldmia $const!, {@XMM[8]} @ .LM0ISR -+ veor @XMM[10], @XMM[0], @XMM[9] @ xor with round0 key -+ veor @XMM[11], @XMM[1], @XMM[9] -+ vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])` -+ vtbl.8 `&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])` -+ veor @XMM[12], @XMM[2], @XMM[9] -+ vtbl.8 `&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])` -+ vtbl.8 `&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])` -+ veor @XMM[13], @XMM[3], @XMM[9] -+ vtbl.8 `&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])` -+ vtbl.8 `&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])` -+ veor @XMM[14], @XMM[4], @XMM[9] -+ vtbl.8 `&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])` -+ vtbl.8 `&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])` -+ veor @XMM[15], @XMM[5], @XMM[9] -+ vtbl.8 `&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])` -+ vtbl.8 `&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])` -+ veor @XMM[10], @XMM[6], @XMM[9] -+ vtbl.8 `&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])` -+ vtbl.8 `&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])` -+ veor @XMM[11], @XMM[7], @XMM[9] -+ vtbl.8 `&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])` -+ vtbl.8 `&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])` -+ vtbl.8 `&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])` -+ vtbl.8 `&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])` -+___ -+ &bitslice (@XMM[0..7, 8..11]); -+$code.=<<___; -+ sub $rounds,$rounds,#1 -+ b .Ldec_sbox -+ -+ .align 5 -+.Ldec_loop: -+___ -+ &ShiftRows (@XMM[0..7, 8..12]); -+$code.=".Ldec_sbox:\n"; -+ &InvSbox (@XMM[0..7, 8..15]); -+$code.=<<___; -+ subs $rounds,$rounds,#1 -+ bcc .Ldec_done -+___ -+ &InvMixColumns (@XMM[0,1,6,4,2,7,3,5, 8..15]); -+$code.=<<___; -+ vldmia $const, {@XMM[12]} @ .LISR -+ addeq $const,$const,#0x10 -+ bne .Ldec_loop -+ vldmia $const, {@XMM[12]} @ .LISRM0 -+ b .Ldec_loop -+.align 4 -+.Ldec_done: -+___ -+ &bitslice (@XMM[0,1,6,4,2,7,3,5, 8..11]); -+$code.=<<___; -+ vldmia $key, {@XMM[8]} @ last round key -+ veor @XMM[6], @XMM[6], @XMM[8] -+ veor @XMM[4], @XMM[4], @XMM[8] -+ veor @XMM[2], @XMM[2], @XMM[8] -+ veor @XMM[7], @XMM[7], @XMM[8] -+ veor @XMM[3], @XMM[3], @XMM[8] -+ veor @XMM[5], @XMM[5], @XMM[8] -+ veor @XMM[0], @XMM[0], @XMM[8] -+ veor @XMM[1], @XMM[1], @XMM[8] -+ bx lr -+.size _bsaes_decrypt8,.-_bsaes_decrypt8 -+ -+ .align 4 -+_bsaes_const: -+.LM0SR: @ ShiftRows constants -+ .quad 0x0a0e02060f03070b, 0x0004080c05090d01 -+.LSR: -+ .quad 0x0504070600030201, 0x0f0e0d0c0a09080b -+.LSRM0: -+ .quad 0x0304090e00050a0f, 0x01060b0c0207080d -+ -+ .asciz "Bit-sliced AES for NEON, CRYPTOGAMS by <appro\@openssl.org>" -+ .size _bsaes_const,.-_bsaes_const -+ -+ .align 5 -+ .type _bsaes_encrypt8,%function -+_bsaes_encrypt8: -+ adr $const,.LM0SR -+ vldmia $key!, {@XMM[9]} @ round 0 key -+ -+ vldmia $const!, {@XMM[8]} @ .LM0SR -+ veor @XMM[10], @XMM[0], @XMM[9] @ xor with round0 key -+ veor @XMM[11], @XMM[1], @XMM[9] -+ vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])` -+ vtbl.8 `&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])` -+ veor @XMM[12], @XMM[2], @XMM[9] -+ vtbl.8 `&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])` -+ vtbl.8 `&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])` -+ veor @XMM[13], @XMM[3], @XMM[9] -+ vtbl.8 `&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])` -+ vtbl.8 `&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])` -+ veor @XMM[14], @XMM[4], @XMM[9] -+ vtbl.8 `&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])` -+ vtbl.8 `&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])` -+ veor @XMM[15], @XMM[5], @XMM[9] -+ vtbl.8 `&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])` -+ vtbl.8 `&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])` -+ veor @XMM[10], @XMM[6], @XMM[9] -+ vtbl.8 `&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])` -+ vtbl.8 `&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])` -+ veor @XMM[11], @XMM[7], @XMM[9] -+ vtbl.8 `&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])` -+ vtbl.8 `&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])` -+ vtbl.8 `&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])` -+ vtbl.8 `&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])` -+_bsaes_encrypt8_bitslice: -+___ -+ &bitslice (@XMM[0..7, 8..11]); -+$code.=<<___; -+ sub $rounds,$rounds,#1 -+ b .Lenc_sbox -+.align 4 -+.Lenc_loop: -+___ -+ &ShiftRows (@XMM[0..7, 8..12]); -+$code.=".Lenc_sbox:\n"; -+ &Sbox (@XMM[0..7, 8..15]); -+$code.=<<___; -+ subs $rounds,$rounds,#1 -+ bcc .Lenc_done -+___ -+ &MixColumns (@XMM[0,1,4,6,3,7,2,5, 8..15]); -+$code.=<<___; -+ vldmia $const, {@XMM[12]} @ .LSR -+ addeq $const,$const,#0x10 -+ bne .Lenc_loop -+ vldmia $const, {@XMM[12]} @ .LSRM0 -+ b .Lenc_loop -+.align 4 -+.Lenc_done: -+___ -+ # output in lsb > [t0, t1, t4, t6, t3, t7, t2, t5] < msb -+ &bitslice (@XMM[0,1,4,6,3,7,2,5, 8..11]); -+$code.=<<___; -+ vldmia $key, {@XMM[8]} @ last round key -+ veor @XMM[4], @XMM[4], @XMM[8] -+ veor @XMM[6], @XMM[6], @XMM[8] -+ veor @XMM[3], @XMM[3], @XMM[8] -+ veor @XMM[7], @XMM[7], @XMM[8] -+ veor @XMM[2], @XMM[2], @XMM[8] -+ veor @XMM[5], @XMM[5], @XMM[8] -+ veor @XMM[0], @XMM[0], @XMM[8] -+ veor @XMM[1], @XMM[1], @XMM[8] -+ bx lr -+.size _bsaes_encrypt8,.-_bsaes_encrypt8 -+___ -+} -+{ -+my ($out,$inp,$rounds,$const)=("r12","r4","r5","r6"); -+ -+sub bitslice_key { -+my @x=reverse(@_[0..7]); -+my ($bs0,$bs1,$bs2,$t2,$t3)=@_[8..12]; -+ -+ &swapmove (@x[0,1],1,$bs0,$t2,$t3); -+$code.=<<___; -+ @ &swapmove(@x[2,3],1,$t0,$t2,$t3); -+ vmov @x[2], @x[0] -+ vmov @x[3], @x[1] -+___ -+ #&swapmove2x(@x[4,5,6,7],1,$t0,$t2,$t3); -+ -+ &swapmove2x (@x[0,2,1,3],2,$bs1,$t2,$t3); -+$code.=<<___; -+ @ &swapmove2x(@x[4,6,5,7],2,$t1,$t2,$t3); -+ vmov @x[4], @x[0] -+ vmov @x[6], @x[2] -+ vmov @x[5], @x[1] -+ vmov @x[7], @x[3] -+___ -+ &swapmove2x (@x[0,4,1,5],4,$bs2,$t2,$t3); -+ &swapmove2x (@x[2,6,3,7],4,$bs2,$t2,$t3); -+} -+ -+$code.=<<___; -+ .align 4 -+.LM0: -+ .quad 0x02060a0e03070b0f, 0x0004080c0105090d -+ -+ .type _bsaes_key_convert,%function -+_bsaes_key_convert: -+ adr $const,.LM0 -+ vld1.8 {@XMM[7]}, [$inp]! @ load round 0 key -+ vld1.8 {@XMM[15]}, [$inp]! @ load round 1 key -+ -+ vmov.i8 @XMM[8], #0x01 @ bit masks -+ vmov.i8 @XMM[9], #0x02 -+ vmov.i8 @XMM[10], #0x04 -+ vmov.i8 @XMM[11], #0x08 -+ vmov.i8 @XMM[12], #0x10 -+ vmov.i8 @XMM[13], #0x20 -+ vldmia $const, {@XMM[14]} @ .LM0 -+ -+#ifdef __ARMEL__ -+ vrev32.8 @XMM[7], @XMM[7] -+ vrev32.8 @XMM[15], @XMM[15] -+#endif -+ sub $rounds,$rounds,#1 -+ vstmia $out!, {@XMM[7]} @ save round 0 key -+ b .Lkey_loop -+ -+ .align 5 -+.Lkey_loop: -+ vtbl.8 `&Dlo(@XMM[7])`,{@XMM[15]},`&Dlo(@XMM[14])` -+ vtbl.8 `&Dhi(@XMM[7])`,{@XMM[15]},`&Dhi(@XMM[14])` -+ vmov.i8 @XMM[6], #0x40 -+ vmov.i8 @XMM[15], #0x80 -+ -+ vtst.8 @XMM[0], @XMM[7], @XMM[8] -+ vtst.8 @XMM[1], @XMM[7], @XMM[9] -+ vtst.8 @XMM[2], @XMM[7], @XMM[10] -+ vtst.8 @XMM[3], @XMM[7], @XMM[11] -+ vtst.8 @XMM[4], @XMM[7], @XMM[12] -+ vtst.8 @XMM[5], @XMM[7], @XMM[13] -+ vtst.8 @XMM[6], @XMM[7], @XMM[6] -+ vtst.8 @XMM[7], @XMM[7], @XMM[15] -+ vld1.8 {@XMM[15]}, [$inp]! @ load next round key -+ vmvn @XMM[0], @XMM[0] @ "pnot" -+ vmvn @XMM[1], @XMM[1] -+ vmvn @XMM[5], @XMM[5] -+ vmvn @XMM[6], @XMM[6] -+#ifdef __ARMEL__ -+ vrev32.8 @XMM[15], @XMM[15] -+#endif -+ subs $rounds,$rounds,#1 -+ vstmia $out!,{@XMM[0]-@XMM[7]} @ write bit-sliced round key -+ bne .Lkey_loop -+ -+ vmov.i8 @XMM[7],#0x63 @ compose .L63 -+ @ don't save last round key -+ bx lr -+.size _bsaes_key_convert,.-_bsaes_key_convert -+___ -+} -+ -+if (0) { # following four functions are unsupported interface -+ # used for benchmarking... -+$code.=<<___; -+.globl bsaes_enc_key_convert -+.type bsaes_enc_key_convert,%function -+.align 4 -+bsaes_enc_key_convert: -+ stmdb sp!,{r4-r6,lr} -+ vstmdb sp!,{d8-d15} @ ABI specification says so -+ -+ ldr r5,[$inp,#240] @ pass rounds -+ mov r4,$inp @ pass key -+ mov r12,$out @ pass key schedule -+ bl _bsaes_key_convert -+ veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key -+ vstmia r12, {@XMM[7]} @ save last round key -+ -+ vldmia sp!,{d8-d15} -+ ldmia sp!,{r4-r6,pc} -+.size bsaes_enc_key_convert,.-bsaes_enc_key_convert -+ -+.globl bsaes_encrypt_128 -+.type bsaes_encrypt_128,%function -+.align 4 -+bsaes_encrypt_128: -+ stmdb sp!,{r4-r6,lr} -+ vstmdb sp!,{d8-d15} @ ABI specification says so -+.Lenc128_loop: -+ vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input -+ vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! -+ mov r4,$key @ pass the key -+ vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! -+ mov r5,#10 @ pass rounds -+ vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! -+ -+ bl _bsaes_encrypt8 -+ -+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output -+ vst1.8 {@XMM[4]}, [$out]! -+ vst1.8 {@XMM[6]}, [$out]! -+ vst1.8 {@XMM[3]}, [$out]! -+ vst1.8 {@XMM[7]}, [$out]! -+ vst1.8 {@XMM[2]}, [$out]! -+ subs $len,$len,#0x80 -+ vst1.8 {@XMM[5]}, [$out]! -+ bhi .Lenc128_loop -+ -+ vldmia sp!,{d8-d15} -+ ldmia sp!,{r4-r6,pc} -+.size bsaes_encrypt_128,.-bsaes_encrypt_128 -+ -+.globl bsaes_dec_key_convert -+.type bsaes_dec_key_convert,%function -+.align 4 -+bsaes_dec_key_convert: -+ stmdb sp!,{r4-r6,lr} -+ vstmdb sp!,{d8-d15} @ ABI specification says so -+ -+ ldr r5,[$inp,#240] @ pass rounds -+ mov r4,$inp @ pass key -+ mov r12,$out @ pass key schedule -+ bl _bsaes_key_convert -+ vldmia $out, {@XMM[6]} -+ vstmia r12, {@XMM[15]} @ save last round key -+ veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key -+ vstmia $out, {@XMM[7]} -+ -+ vldmia sp!,{d8-d15} -+ ldmia sp!,{r4-r6,pc} -+.size bsaes_dec_key_convert,.-bsaes_dec_key_convert -+ -+.globl bsaes_decrypt_128 -+.type bsaes_decrypt_128,%function -+.align 4 -+bsaes_decrypt_128: -+ stmdb sp!,{r4-r6,lr} -+ vstmdb sp!,{d8-d15} @ ABI specification says so -+.Ldec128_loop: -+ vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input -+ vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! -+ mov r4,$key @ pass the key -+ vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! -+ mov r5,#10 @ pass rounds -+ vld1.8 {@XMM[6]-@XMM[7]}, [$inp]! -+ -+ bl _bsaes_decrypt8 -+ -+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output -+ vst1.8 {@XMM[6]}, [$out]! -+ vst1.8 {@XMM[4]}, [$out]! -+ vst1.8 {@XMM[2]}, [$out]! -+ vst1.8 {@XMM[7]}, [$out]! -+ vst1.8 {@XMM[3]}, [$out]! -+ subs $len,$len,#0x80 -+ vst1.8 {@XMM[5]}, [$out]! -+ bhi .Ldec128_loop -+ -+ vldmia sp!,{d8-d15} -+ ldmia sp!,{r4-r6,pc} -+.size bsaes_decrypt_128,.-bsaes_decrypt_128 -+___ -+} -+ -+{ -+my ($inp,$out,$len,$key,$keysched,$rounds,$iv)=("r0","r1","r2","r3","sp","r7","r8"); -+ -+$code.=<<___; -+ .align 5 -+ .extern AES_cbc_encrypt -+ .extern AES_decrypt -+ .global bsaes_cbc_encrypt -+ .type bsaes_cbc_encrypt,%function -+bsaes_cbc_encrypt: -+ cmp $len, #128 -+ blo AES_cbc_encrypt -+ -+ @ it is up to the caller to make sure we are called with enc == 0 -+ -+ stmdb sp!, {r4-r10, lr} -+ vstmdb sp!, {d8-d15} @ ABI specification says so -+ ldr $iv, [sp, #0x60] @ IV is 1st arg on the stack -+ lsr $len, #4 @ len in 16 byte blocks -+ sub sp, #0x10 @ scratch space to carry over the IV -+ mov r10, sp @ save sp -+ -+ @ allocate the key schedule on the stack -+ ldr $rounds, [r3, #240] @ get # of rounds -+ sub r14, sp, $rounds, lsl #7 @ 128 bytes per inner round key -+ add sp, r14, #`128-32` @ size of bit-sliced key schedule -+ -+ @ populate the key schedule -+ mov r4, $key @ pass key -+ mov r5, $rounds @ pass # of rounds -+ mov r12, $keysched @ pass key schedule -+ bl _bsaes_key_convert -+ vldmia $keysched, {@XMM[6]} -+ vstmia r12, {@XMM[15]} @ save last round key -+ veor @XMM[7], @XMM[7], @XMM[6] @ fix up round 0 key -+ vstmia $keysched, {@XMM[7]} -+ -+ vld1.8 {@XMM[15]}, [$iv] @ load IV -+ mov $iv, r10 -+ -+.Lcbc_dec_loop: -+ subs $len, #0x8 -+ bmi .Lcbc_dec_loop_finish -+ -+ vld1.8 {@XMM[0]-@XMM[1]}, [$inp]! @ load input -+ vld1.8 {@XMM[2]-@XMM[3]}, [$inp]! -+ mov r4, $keysched @ pass the key -+ vld1.8 {@XMM[4]-@XMM[5]}, [$inp]! -+ mov r5, $rounds -+ vld1.8 {@XMM[6]-@XMM[7]}, [$inp] -+ sub $inp, #0x60 -+ vst1.8 {@XMM[15]}, [$iv] @ put aside IV -+ -+ bl _bsaes_decrypt8 -+ -+ vld1.8 {@XMM[14]}, [$iv] @ load IV -+ veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV -+ -+ vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input -+ vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! -+ veor @XMM[1], @XMM[1], @XMM[8] -+ veor @XMM[6], @XMM[6], @XMM[9] -+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output -+ vst1.8 {@XMM[6]}, [$out]! -+ vld1.8 {@XMM[12]-@XMM[13]}, [$inp]! -+ veor @XMM[4], @XMM[4], @XMM[10] -+ veor @XMM[2], @XMM[2], @XMM[11] -+ vst1.8 {@XMM[4]}, [$out]! -+ vst1.8 {@XMM[2]}, [$out]! -+ vld1.8 {@XMM[14]-@XMM[15]}, [$inp]! -+ veor @XMM[7], @XMM[7], @XMM[12] -+ veor @XMM[3], @XMM[3], @XMM[13] -+ veor @XMM[5], @XMM[5], @XMM[14] -+ vst1.8 {@XMM[7]}, [$out]! -+ vst1.8 {@XMM[3]}, [$out]! -+ vst1.8 {@XMM[5]}, [$out]! -+ -+ b .Lcbc_dec_loop -+ -+.Lcbc_dec_loop_finish: -+ adds $len, #8 -+ beq .Lcbc_dec_done -+ -+ mov r5, $rounds -+ vld1.8 {@XMM[0]}, [$inp]! @ load input -+ mov r4, $keysched @ pass the key -+ cmp $len, #2 -+ blo .Lcbc_dec_one -+ vld1.8 {@XMM[1]}, [$inp]! -+ beq .Lcbc_dec_two -+ vld1.8 {@XMM[2]}, [$inp]! -+ cmp $len, #4 -+ blo .Lcbc_dec_three -+ vld1.8 {@XMM[3]}, [$inp]! -+ beq .Lcbc_dec_four -+ vld1.8 {@XMM[4]}, [$inp]! -+ cmp $len, #6 -+ blo .Lcbc_dec_five -+ vld1.8 {@XMM[5]}, [$inp]! -+ beq .Lcbc_dec_six -+ vld1.8 {@XMM[6]}, [$inp]! -+ vst1.8 {@XMM[15]}, [$iv] @ put aside IV -+ bl _bsaes_decrypt8 -+ sub $inp, #0x70 -+ vld1.8 {@XMM[14]}, [$iv] @ load IV -+ veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV -+ vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input -+ vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! -+ veor @XMM[1], @XMM[1], @XMM[8] -+ veor @XMM[6], @XMM[6], @XMM[9] -+ vld1.8 {@XMM[12]-@XMM[13]}, [$inp]! -+ veor @XMM[4], @XMM[4], @XMM[10] -+ veor @XMM[2], @XMM[2], @XMM[11] -+ vld1.8 {@XMM[15]}, [$inp]! -+ veor @XMM[7], @XMM[7], @XMM[12] -+ veor @XMM[3], @XMM[3], @XMM[13] -+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output -+ vst1.8 {@XMM[6]}, [$out]! -+ vst1.8 {@XMM[4]}, [$out]! -+ vst1.8 {@XMM[2]}, [$out]! -+ vst1.8 {@XMM[7]}, [$out]! -+ vst1.8 {@XMM[3]}, [$out]! -+ b .Lcbc_dec_done -+.align 4 -+.Lcbc_dec_six: -+ sub $inp, #0x60 -+ vst1.8 {@XMM[15]}, [$iv] @ put aside IV -+ bl _bsaes_decrypt8 -+ vld1.8 {@XMM[14]}, [$iv] @ load IV -+ veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV -+ vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input -+ vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! -+ veor @XMM[1], @XMM[1], @XMM[8] -+ veor @XMM[6], @XMM[6], @XMM[9] -+ vld1.8 {@XMM[12]}, [$inp]! -+ veor @XMM[4], @XMM[4], @XMM[10] -+ veor @XMM[2], @XMM[2], @XMM[11] -+ vld1.8 {@XMM[15]}, [$inp]! -+ veor @XMM[7], @XMM[7], @XMM[12] -+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output -+ vst1.8 {@XMM[6]}, [$out]! -+ vst1.8 {@XMM[4]}, [$out]! -+ vst1.8 {@XMM[2]}, [$out]! -+ vst1.8 {@XMM[7]}, [$out]! -+ b .Lcbc_dec_done -+.align 4 -+.Lcbc_dec_five: -+ sub $inp, #0x50 -+ vst1.8 {@XMM[15]}, [$iv] @ put aside IV -+ bl _bsaes_decrypt8 -+ vld1.8 {@XMM[14]}, [$iv] @ load IV -+ veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV -+ vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input -+ vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! -+ veor @XMM[1], @XMM[1], @XMM[8] -+ veor @XMM[6], @XMM[6], @XMM[9] -+ vld1.8 {@XMM[15]}, [$inp]! -+ veor @XMM[4], @XMM[4], @XMM[10] -+ veor @XMM[2], @XMM[2], @XMM[11] -+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output -+ vst1.8 {@XMM[6]}, [$out]! -+ vst1.8 {@XMM[4]}, [$out]! -+ vst1.8 {@XMM[2]}, [$out]! -+ b .Lcbc_dec_done -+.align 4 -+.Lcbc_dec_four: -+ sub $inp, #0x40 -+ vst1.8 {@XMM[15]}, [$iv] @ put aside IV -+ bl _bsaes_decrypt8 -+ vld1.8 {@XMM[14]}, [$iv] @ load IV -+ veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV -+ vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input -+ vld1.8 {@XMM[10]}, [$inp]! -+ veor @XMM[1], @XMM[1], @XMM[8] -+ veor @XMM[6], @XMM[6], @XMM[9] -+ vld1.8 {@XMM[15]}, [$inp]! -+ veor @XMM[4], @XMM[4], @XMM[10] -+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output -+ vst1.8 {@XMM[6]}, [$out]! -+ vst1.8 {@XMM[4]}, [$out]! -+ b .Lcbc_dec_done -+.align 4 -+.Lcbc_dec_three: -+ sub $inp, #0x30 -+ vst1.8 {@XMM[15]}, [$iv] @ put aside IV -+ bl _bsaes_decrypt8 -+ vld1.8 {@XMM[14]}, [$iv] @ load IV -+ veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV -+ vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ reload input -+ vld1.8 {@XMM[15]}, [$inp]! -+ veor @XMM[1], @XMM[1], @XMM[8] -+ veor @XMM[6], @XMM[6], @XMM[9] -+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output -+ vst1.8 {@XMM[6]}, [$out]! -+ b .Lcbc_dec_done -+.align 4 -+.Lcbc_dec_two: -+ sub $inp, #0x20 -+ vst1.8 {@XMM[15]}, [$iv] @ put aside IV -+ bl _bsaes_decrypt8 -+ vld1.8 {@XMM[14]}, [$iv] @ load IV -+ veor @XMM[0], @XMM[0], @XMM[14] @ ^= IV -+ vld1.8 {@XMM[8]}, [$inp]! @ reload input -+ veor @XMM[1], @XMM[1], @XMM[8] -+ vld1.8 {@XMM[15]}, [$inp]! @ reload input -+ vst1.8 {@XMM[0]-@XMM[1]}, [$out]! @ write output -+ b .Lcbc_dec_done -+.align 4 -+.Lcbc_dec_one: -+ sub $inp, #0x10 -+ mov r7, $out @ save original out pointer -+ mov $out, $iv @ use the iv scratch space as out buffer -+ mov r2, $key -+ bl AES_decrypt -+ vld1.8 {@XMM[0]}, [$iv] @ reload output -+ veor @XMM[15], @XMM[15], @XMM[0] @ ^= IV -+ vst1.8 {@XMM[15]}, [r7] -+ vmov @XMM[15], @XMM[0] @ IV -+ -+.Lcbc_dec_done: -+ vmov.i32 q0, #0 -+.Lcbc_dec_bzero: @ wipe key schedule [if any] -+ vst1.8 {q0}, [$keysched]! -+ teq $keysched, r10 -+ bne .Lcbc_dec_bzero -+ -+ add sp, #0x10 -+ ldr $iv, [sp, #0x60] -+ vst1.8 {@XMM[15]}, [$iv] @ return IV -+ vldmia sp!, {d8-d15} -+ ldmia sp!, {r4-r10, pc} -+ -+ .size bsaes_cbc_encrypt,.-bsaes_cbc_encrypt -+___ -+} -+{ -+my ($inp,$out,$len,$key,$keysched,$const,$rounds,$ctr)=("r0","r1","r2","r3","sp","r6","r7","r8"); -+ -+$code.=<<___; -+ .align 5 -+ @ byte-swap constants -+.LSWP: -+ .quad 0x0405060703020100 -+.LSWPUPM0SR: -+ .quad 0x0a0d02060c03070b, 0x0004080f05090e01 -+.LADD: -+ .quad 0x0807060504030201 -+ -+ .extern AES_encrypt -+ .global bsaes_ctr32_encrypt_blocks -+ .type bsaes_ctr32_encrypt_blocks,%function -+bsaes_ctr32_encrypt_blocks: -+ cmp $len, #8 @ use plain AES for -+ blo .Lctr_enc_short @ small sizes -+ -+ stmdb sp!, {r4-r8, r10-r11, lr} -+ vstmdb sp!, {d8-d15} @ ABI specification says so -+ ldr $ctr, [sp, #0x60] @ ctr is 1st arg on the stack -+ sub sp, #0x10 @ scratch space to carry over the ctr -+ mov r10, sp @ save sp -+ -+ @ allocate the key schedule on the stack -+ ldr $rounds, [r3, #240] @ get # of rounds -+ sub r14, sp, $rounds, lsl #7 @ 128 bytes per inner round key -+ add sp, r14, #`128-32` @ size of bit-sliced key schedule -+ -+ @ populate the key schedule -+ mov r4, $key @ pass key -+ mov r5, $rounds @ pass # of rounds -+ mov r12, $keysched @ pass key schedule -+ bl _bsaes_key_convert -+ veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key -+ vstmia r12, {@XMM[7]} @ save last round key -+ -+ vldm $ctr, {@XMM[0]} @ load counter -+ mov $ctr, r10 -+ -+ vldm $keysched, {@XMM[4]} @ load round0 key -+ -+ vldr `&Dlo(@XMM[8])`, .LSWP @ byte swap upper part -+ vtbl.8 `&Dhi(@XMM[0])`, {`&Dhi(@XMM[0])`}, `&Dlo(@XMM[8])` -+ vtbl.8 `&Dhi(@XMM[4])`, {`&Dhi(@XMM[4])`}, `&Dlo(@XMM[8])` -+ -+ vstm $keysched, {@XMM[4]} @ save adjusted round0 key -+ -+ b .Lctr_enc_loop -+ -+ .align 5 -+.Lctr_enc_loop: -+ -+ @ set up the addition constants -+ vldr `&Dlo(@XMM[11])`, .LADD -+ vmov.i8 `&Dhi(@XMM[11])`, #0 -+ vmov.i8 @XMM[12], #0 -+ vzip.8 `&Dlo(@XMM[11])`, `&Dhi(@XMM[11])` -+ vzip.16 @XMM[11], @XMM[12] -+ -+ @ get 8 counter values in regs and do the add -+ vdup.32 @XMM[4], `&Dhi(@XMM[0])`[1] -+ vdup.32 @XMM[9], `&Dhi(@XMM[0])`[1] -+ vadd.u32 @XMM[4], @XMM[11] -+ vadd.u32 @XMM[9], @XMM[12] -+ vdup.32 @XMM[2], `&Dhi(@XMM[0])`[0] -+ vdup.32 @XMM[6], `&Dhi(@XMM[0])`[0] -+ vzip.32 @XMM[2], @XMM[4] -+ vzip.32 @XMM[6], @XMM[9] -+ -+ vmov `&Dhi(@XMM[1])`, `&Dlo(@XMM[0])` -+ vmov `&Dlo(@XMM[1])`, `&Dlo(@XMM[0])` -+ vmov @XMM[3], @XMM[1] -+ vmov @XMM[5], @XMM[1] -+ vmov @XMM[7], @XMM[1] -+ -+ vswp `&Dhi(@XMM[1])`, `&Dlo(@XMM[2])` -+ vswp `&Dhi(@XMM[3])`, `&Dlo(@XMM[4])` -+ vswp `&Dhi(@XMM[5])`, `&Dlo(@XMM[6])` -+ vswp `&Dhi(@XMM[7])`, `&Dlo(@XMM[9])` -+ -+ vstm $ctr, {@XMM[9]} @ save counter -+ -+ @ Borrow prologue from _bsaes_encrypt8 to use the opportunity -+ @ to flip byte order in 32-bit counter -+ -+ adr r11, .LSWPUPM0SR -+ vld1.8 {@XMM[8]}, [r11]! @ .LSWPUPM0SR -+ adrl $const,.LSR -+ vld1.8 {@XMM[9]}, [$keysched] @ load round0 key -+ mov r5, $rounds @ pass rounds -+ add r4, $keysched, #0x10 @ pass next round key -+ veor @XMM[10], @XMM[0], @XMM[9] @ xor with round0 key -+ veor @XMM[11], @XMM[1], @XMM[9] -+ vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])` -+ vtbl.8 `&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])` -+ veor @XMM[12], @XMM[2], @XMM[9] -+ vtbl.8 `&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])` -+ vtbl.8 `&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])` -+ veor @XMM[13], @XMM[3], @XMM[9] -+ vtbl.8 `&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])` -+ vtbl.8 `&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])` -+ veor @XMM[14], @XMM[4], @XMM[9] -+ vtbl.8 `&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])` -+ vtbl.8 `&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])` -+ veor @XMM[15], @XMM[5], @XMM[9] -+ vtbl.8 `&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])` -+ vtbl.8 `&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])` -+ veor @XMM[10], @XMM[6], @XMM[9] -+ vtbl.8 `&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])` -+ vtbl.8 `&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])` -+ veor @XMM[11], @XMM[7], @XMM[9] -+ vtbl.8 `&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])` -+ vtbl.8 `&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])` -+ vtbl.8 `&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])` -+ vtbl.8 `&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])` -+ -+ bl _bsaes_encrypt8_bitslice -+ -+ subs $len, #8 -+ blo .Lctr_enc_loop_done -+ -+ vldmia $inp!, {@XMM[8]-@XMM[15]} @ load input -+ -+ veor @XMM[8], @XMM[0] -+ veor @XMM[1], @XMM[9] -+ vst1.8 {@XMM[8]}, [$out]! -+ veor @XMM[4], @XMM[10] -+ vst1.8 {@XMM[1]}, [$out]! -+ veor @XMM[6], @XMM[11] -+ vst1.8 {@XMM[4]}, [$out]! -+ veor @XMM[3], @XMM[12] -+ vst1.8 {@XMM[6]}, [$out]! -+ veor @XMM[7], @XMM[13] -+ vst1.8 {@XMM[3]}, [$out]! -+ veor @XMM[2], @XMM[14] -+ vst1.8 {@XMM[7]}, [$out]! -+ veor @XMM[5], @XMM[15] -+ vst1.8 {@XMM[2]}, [$out]! -+ vst1.8 {@XMM[5]}, [$out]! -+ -+ vldm $ctr, {@XMM[0]} @ load counter -+ -+ bne .Lctr_enc_loop -+ b .Lctr_enc_done -+ -+.Lctr_enc_loop_done: -+ add $len, #8 -+ vld1.8 {@XMM[8]}, [$inp]! @ load input -+ veor @XMM[0], @XMM[8] -+ vst1.8 {@XMM[0]}, [$out]! @ write output -+ cmp $len, #2 -+ blo .Lctr_enc_done -+ vld1.8 {@XMM[9]}, [$inp]! -+ veor @XMM[1], @XMM[9] -+ vst1.8 {@XMM[1]}, [$out]! -+ beq .Lctr_enc_done -+ vld1.8 {@XMM[10]}, [$inp]! -+ veor @XMM[4], @XMM[10] -+ vst1.8 {@XMM[4]}, [$out]! -+ cmp $len, #4 -+ blo .Lctr_enc_done -+ vld1.8 {@XMM[11]}, [$inp]! -+ veor @XMM[6], @XMM[11] -+ vst1.8 {@XMM[6]}, [$out]! -+ beq .Lctr_enc_done -+ vld1.8 {@XMM[12]}, [$inp]! -+ veor @XMM[3], @XMM[12] -+ vst1.8 {@XMM[3]}, [$out]! -+ cmp $len, #6 -+ blo .Lctr_enc_done -+ vld1.8 {@XMM[13]}, [$inp]! -+ veor @XMM[7], @XMM[13] -+ vst1.8 {@XMM[7]}, [$out]! -+ beq .Lctr_enc_done -+ vld1.8 {@XMM[14]}, [$inp] -+ veor @XMM[2], @XMM[14] -+ vst1.8 {@XMM[2]}, [$out]! -+ -+.Lctr_enc_done: -+ vmov.i32 q0, #0 -+.Lctr_enc_bzero: @ wipe key schedule [if any] -+ vst1.8 {q0}, [$keysched]! -+ teq $keysched, r10 -+ bne .Lctr_enc_bzero -+ -+ add sp, r10, #0x10 -+ vldmia sp!, {d8-d15} -+ ldmia sp!, {r4-r8, r10-r11, pc} -+ -+.Lctr_enc_short: -+ ldr ip, [sp] @ ctr pointer is passed on stack -+ stmdb sp!, {r0-r6, lr} @ stack regs as usual -+ -+ ldm sp, {r4-r6} @ copy r0-2 to r4-6 -+ vldmia ip, {d0-d1} @ load the counter from [arg5] -+ vstmdb sp!, {d0-d1} @ copy of ctr to top of stack -+ sub sp, #0x10 -+ -+.Lctr_enc_short_loop: -+ add r0, sp, #0x10 -+ mov r1, sp @ put output on the stack -+ ldr r2, [sp, #0x2c] @ stacked r3 -+ -+ bl AES_encrypt -+ -+ vldmia r4!, {@XMM[1]} @ load input -+ ldr r0, [sp, #0x1c] @ load LSW of counter (BE) -+#ifdef __ARMEL__ -+ rev r0, r0 @ need to increment the counter -+ add r0, #1 @ in BE mode -+ rev r0, r0 -+#else -+ add r0, #1 -+#endif -+ vldm sp, {@XMM[0]} -+ str r0, [sp, #0x1c] -+ veor @XMM[0], @XMM[1] -+ subs r6, #1 -+ vstmia r5!, {@XMM[0]} -+ bne .Lctr_enc_short_loop -+ -+ add sp, #0x30 -+ ldmia sp!, {r4-r6, pc} -+ -+ .size bsaes_ctr32_encrypt_blocks,.-bsaes_ctr32_encrypt_blocks -+___ -+} -+$code =~ s/\`([^\`]*)\`/eval($1)/gem; -+ -+print $code; -+ -+close STDOUT; ---- a/crypto/evp/e_aes.c -+++ b/crypto/evp/e_aes.c -@@ -482,6 +482,12 @@ static const EVP_CIPHER aes_##keylen##_# - NULL,NULL,aes_##mode##_ctrl,NULL }; \ - const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \ - { return &aes_##keylen##_##mode; } -+ -+#endif -+ -+#if defined(AES_ASM) && defined(BSAES_ASM) && (defined(__arm__) || defined(__arm)) -+#include "arm_arch.h" -+#define BSAES_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON) - #endif - - #define BLOCK_CIPHER_generic_pack(nid,keylen,flags) \ -@@ -1064,11 +1070,13 @@ static int aes_xts_init_key(EVP_CIPHER_C - xctx->stream = NULL; - #endif - /* key_len is two AES keys */ -+#if !(defined(__arm__) || defined(__arm)) - #ifdef BSAES_CAPABLE - if (BSAES_CAPABLE) - xctx->stream = enc ? bsaes_xts_encrypt : bsaes_xts_decrypt; - else - #endif -+#endif - #ifdef VPAES_CAPABLE - if (VPAES_CAPABLE) - { diff --git a/debian/patches/old/0002-bsaes-armv7.pl-Big-endian-fixes.patch b/debian/patches/old/0002-bsaes-armv7.pl-Big-endian-fixes.patch deleted file mode 100644 index 23fb94a..0000000 --- a/debian/patches/old/0002-bsaes-armv7.pl-Big-endian-fixes.patch +++ /dev/null @@ -1,216 +0,0 @@ -From 719e0b800e3737f3a19251a097ff911744ed7a9e Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ard.biesheuvel@linaro.org> -Date: Mon, 15 Apr 2013 13:54:13 +0200 -Subject: [PATCH 2/3] bsaes-armv7.pl: Big endian fixes - -Updated the code to be (more) endian neutral, however, as it is -still untested on big endian, it is only enabled for little endian -at the moment. ---- - crypto/aes/asm/bsaes-armv7.pl | 121 ++++++++++++++---------------------------- - crypto/evp/e_aes.c | 3 - - 2 files changed, 45 insertions(+), 79 deletions(-) - ---- a/crypto/aes/asm/bsaes-armv7.pl -+++ b/crypto/aes/asm/bsaes-armv7.pl -@@ -1196,8 +1196,9 @@ bsaes_cbc_encrypt: - - .Lcbc_dec_done: - vmov.i32 q0, #0 -+ vmov.i32 q1, #0 - .Lcbc_dec_bzero: @ wipe key schedule [if any] -- vst1.8 {q0}, [$keysched]! -+ vstm $keysched!, {q0-q1} - teq $keysched, r10 - bne .Lcbc_dec_bzero - -@@ -1215,13 +1216,9 @@ my ($inp,$out,$len,$key,$keysched,$const - - $code.=<<___; - .align 5 -- @ byte-swap constants --.LSWP: -- .quad 0x0405060703020100 --.LSWPUPM0SR: -- .quad 0x0a0d02060c03070b, 0x0004080f05090e01 -+ - .LADD: -- .quad 0x0807060504030201 -+ .long 1,2,3,4,5,6,7,0 - - .extern AES_encrypt - .global bsaes_ctr32_encrypt_blocks -@@ -1233,7 +1230,7 @@ bsaes_ctr32_encrypt_blocks: - stmdb sp!, {r4-r8, r10-r11, lr} - vstmdb sp!, {d8-d15} @ ABI specification says so - ldr $ctr, [sp, #0x60] @ ctr is 1st arg on the stack -- sub sp, #0x10 @ scratch space to carry over the ctr -+ sub sp, #0x20 @ scratch space to carry over the ctr - mov r10, sp @ save sp - - @ allocate the key schedule on the stack -@@ -1249,92 +1246,61 @@ bsaes_ctr32_encrypt_blocks: - veor @XMM[7],@XMM[7],@XMM[15] @ fix up last round key - vstmia r12, {@XMM[7]} @ save last round key - -- vldm $ctr, {@XMM[0]} @ load counter -- mov $ctr, r10 -- -- vldm $keysched, {@XMM[4]} @ load round0 key -- -- vldr `&Dlo(@XMM[8])`, .LSWP @ byte swap upper part -- vtbl.8 `&Dhi(@XMM[0])`, {`&Dhi(@XMM[0])`}, `&Dlo(@XMM[8])` -- vtbl.8 `&Dhi(@XMM[4])`, {`&Dhi(@XMM[4])`}, `&Dlo(@XMM[8])` -- -- vstm $keysched, {@XMM[4]} @ save adjusted round0 key -- -+ @ copy the invariant bits of the ctr -+ ldm $ctr, {r4-r5, r11} -+ mov r12, r11 -+ stm r10!, {r4-r5} -+ stm r10!, {r4-r5} -+ stm r10!, {r11-r12} -+ stm r10!, {r11-r12} -+ sub r10, #0x20 -+ -+ ldr r11, [$ctr, #0xc] @ get LSW of BE ctr -+#ifdef __ARMEL__ -+ rev r11, r11 -+#endif - b .Lctr_enc_loop - - .align 5 - .Lctr_enc_loop: - -- @ set up the addition constants -- vldr `&Dlo(@XMM[11])`, .LADD -- vmov.i8 `&Dhi(@XMM[11])`, #0 -- vmov.i8 @XMM[12], #0 -- vzip.8 `&Dlo(@XMM[11])`, `&Dhi(@XMM[11])` -- vzip.16 @XMM[11], @XMM[12] -- - @ get 8 counter values in regs and do the add -- vdup.32 @XMM[4], `&Dhi(@XMM[0])`[1] -- vdup.32 @XMM[9], `&Dhi(@XMM[0])`[1] -+ adr r4, .LADD -+ vdup.32 @XMM[4], r11 -+ vldm r4, {@XMM[11]-@XMM[12]} -+ vmov @XMM[0], @XMM[4] - vadd.u32 @XMM[4], @XMM[11] -- vadd.u32 @XMM[9], @XMM[12] -- vdup.32 @XMM[2], `&Dhi(@XMM[0])`[0] -- vdup.32 @XMM[6], `&Dhi(@XMM[0])`[0] -+ vadd.u32 @XMM[0], @XMM[12] -+#ifdef __ARMEL__ -+ vrev32.8 @XMM[4], @XMM[4] -+ vrev32.8 @XMM[0], @XMM[0] -+#endif -+ vld1.8 {@XMM[1]-@XMM[2]}, [r10] -+ vld1.8 {@XMM[5]-@XMM[6]}, [r10] - vzip.32 @XMM[2], @XMM[4] -- vzip.32 @XMM[6], @XMM[9] -- -- vmov `&Dhi(@XMM[1])`, `&Dlo(@XMM[0])` -- vmov `&Dlo(@XMM[1])`, `&Dlo(@XMM[0])` - vmov @XMM[3], @XMM[1] -- vmov @XMM[5], @XMM[1] -- vmov @XMM[7], @XMM[1] -+ vzip.32 @XMM[6], @XMM[0] -+ vmov @XMM[7], @XMM[5] - - vswp `&Dhi(@XMM[1])`, `&Dlo(@XMM[2])` - vswp `&Dhi(@XMM[3])`, `&Dlo(@XMM[4])` - vswp `&Dhi(@XMM[5])`, `&Dlo(@XMM[6])` -- vswp `&Dhi(@XMM[7])`, `&Dlo(@XMM[9])` -- -- vstm $ctr, {@XMM[9]} @ save counter -- -- @ Borrow prologue from _bsaes_encrypt8 to use the opportunity -- @ to flip byte order in 32-bit counter -+ vswp `&Dhi(@XMM[7])`, `&Dlo(@XMM[0])` - -- adr r11, .LSWPUPM0SR -- vld1.8 {@XMM[8]}, [r11]! @ .LSWPUPM0SR -- adrl $const,.LSR -- vld1.8 {@XMM[9]}, [$keysched] @ load round0 key -+ mov r4, $keysched @ pass round key - mov r5, $rounds @ pass rounds -- add r4, $keysched, #0x10 @ pass next round key -- veor @XMM[10], @XMM[0], @XMM[9] @ xor with round0 key -- veor @XMM[11], @XMM[1], @XMM[9] -- vtbl.8 `&Dlo(@XMM[0])`, {@XMM[10]}, `&Dlo(@XMM[8])` -- vtbl.8 `&Dhi(@XMM[0])`, {@XMM[10]}, `&Dhi(@XMM[8])` -- veor @XMM[12], @XMM[2], @XMM[9] -- vtbl.8 `&Dlo(@XMM[1])`, {@XMM[11]}, `&Dlo(@XMM[8])` -- vtbl.8 `&Dhi(@XMM[1])`, {@XMM[11]}, `&Dhi(@XMM[8])` -- veor @XMM[13], @XMM[3], @XMM[9] -- vtbl.8 `&Dlo(@XMM[2])`, {@XMM[12]}, `&Dlo(@XMM[8])` -- vtbl.8 `&Dhi(@XMM[2])`, {@XMM[12]}, `&Dhi(@XMM[8])` -- veor @XMM[14], @XMM[4], @XMM[9] -- vtbl.8 `&Dlo(@XMM[3])`, {@XMM[13]}, `&Dlo(@XMM[8])` -- vtbl.8 `&Dhi(@XMM[3])`, {@XMM[13]}, `&Dhi(@XMM[8])` -- veor @XMM[15], @XMM[5], @XMM[9] -- vtbl.8 `&Dlo(@XMM[4])`, {@XMM[14]}, `&Dlo(@XMM[8])` -- vtbl.8 `&Dhi(@XMM[4])`, {@XMM[14]}, `&Dhi(@XMM[8])` -- veor @XMM[10], @XMM[6], @XMM[9] -- vtbl.8 `&Dlo(@XMM[5])`, {@XMM[15]}, `&Dlo(@XMM[8])` -- vtbl.8 `&Dhi(@XMM[5])`, {@XMM[15]}, `&Dhi(@XMM[8])` -- veor @XMM[11], @XMM[7], @XMM[9] -- vtbl.8 `&Dlo(@XMM[6])`, {@XMM[10]}, `&Dlo(@XMM[8])` -- vtbl.8 `&Dhi(@XMM[6])`, {@XMM[10]}, `&Dhi(@XMM[8])` -- vtbl.8 `&Dlo(@XMM[7])`, {@XMM[11]}, `&Dlo(@XMM[8])` -- vtbl.8 `&Dhi(@XMM[7])`, {@XMM[11]}, `&Dhi(@XMM[8])` - -- bl _bsaes_encrypt8_bitslice -+ bl _bsaes_encrypt8 - - subs $len, #8 -+ add r11, #8 -+ - blo .Lctr_enc_loop_done - -- vldmia $inp!, {@XMM[8]-@XMM[15]} @ load input -+ vld1.8 {@XMM[8]-@XMM[9]}, [$inp]! @ load input -+ vld1.8 {@XMM[10]-@XMM[11]}, [$inp]! -+ vld1.8 {@XMM[12]-@XMM[13]}, [$inp]! -+ vld1.8 {@XMM[14]-@XMM[15]}, [$inp]! - - veor @XMM[8], @XMM[0] - veor @XMM[1], @XMM[9] -@@ -1353,8 +1319,6 @@ bsaes_ctr32_encrypt_blocks: - vst1.8 {@XMM[2]}, [$out]! - vst1.8 {@XMM[5]}, [$out]! - -- vldm $ctr, {@XMM[0]} @ load counter -- - bne .Lctr_enc_loop - b .Lctr_enc_done - -@@ -1393,12 +1357,13 @@ bsaes_ctr32_encrypt_blocks: - - .Lctr_enc_done: - vmov.i32 q0, #0 -+ vmov.i32 q1, #0 - .Lctr_enc_bzero: @ wipe key schedule [if any] -- vst1.8 {q0}, [$keysched]! -+ vstm $keysched!, {q0-q1} - teq $keysched, r10 - bne .Lctr_enc_bzero - -- add sp, r10, #0x10 -+ add sp, r10, #0x20 - vldmia sp!, {d8-d15} - ldmia sp!, {r4-r8, r10-r11, pc} - ---- a/crypto/evp/e_aes.c -+++ b/crypto/evp/e_aes.c -@@ -485,7 +485,8 @@ const EVP_CIPHER *EVP_aes_##keylen##_##m - - #endif - --#if defined(AES_ASM) && defined(BSAES_ASM) && (defined(__arm__) || defined(__arm)) -+#if defined(AES_ASM) && defined(BSAES_ASM) && (defined(__arm__) || defined(__arm)) \ -+ && defined(__ARMEL__) - #include "arm_arch.h" - #define BSAES_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON) - #endif diff --git a/debian/patches/old/0003-bsaes-armv7.pl-avoid-bit-sliced-AES-CBC-for-block-si.patch b/debian/patches/old/0003-bsaes-armv7.pl-avoid-bit-sliced-AES-CBC-for-block-si.patch deleted file mode 100644 index 4d2235d..0000000 --- a/debian/patches/old/0003-bsaes-armv7.pl-avoid-bit-sliced-AES-CBC-for-block-si.patch +++ /dev/null @@ -1,24 +0,0 @@ -From a2f9535dd2b0d2e230f978aa3eaf103f5224b6d5 Mon Sep 17 00:00:00 2001 -From: Ard Biesheuvel <ard.biesheuvel@linaro.org> -Date: Mon, 15 Apr 2013 14:32:59 +0200 -Subject: [PATCH 3/3] bsaes-armv7.pl: avoid bit-sliced AES/CBC for block sizes - < 1k - -Avoid using bit sliced AES for CBC decryption when the block size -is smaller than 1k. The reason is that the overhead of creating the -key schedule is larger than the obtained speedup on Cortex-A9. ---- - crypto/aes/asm/bsaes-armv7.pl | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - ---- a/crypto/aes/asm/bsaes-armv7.pl -+++ b/crypto/aes/asm/bsaes-armv7.pl -@@ -985,7 +985,7 @@ $code.=<<___; - .global bsaes_cbc_encrypt - .type bsaes_cbc_encrypt,%function - bsaes_cbc_encrypt: -- cmp $len, #128 -+ cmp $len, #1024 - blo AES_cbc_encrypt - - @ it is up to the caller to make sure we are called with enc == 0 diff --git a/debian/patches/pic.patch b/debian/patches/pic.patch index ed95be4..bf63614 100644 --- a/debian/patches/pic.patch +++ b/debian/patches/pic.patch @@ -1,9 +1,9 @@ --- crypto/des/asm/desboth.pl | 17 ++++++++++++++--- crypto/perlasm/cbc.pl | 24 ++++++++++++++++++++---- - crypto/perlasm/x86gas.pl | 11 +++++++++++ + crypto/perlasm/x86gas.pl | 16 ++++++++++++++++ crypto/x86cpuid.pl | 10 +++++----- - 4 files changed, 50 insertions(+), 12 deletions(-) + 4 files changed, 55 insertions(+), 12 deletions(-) --- a/crypto/des/asm/desboth.pl +++ b/crypto/des/asm/desboth.pl @@ -108,11 +108,13 @@ } push(@out,$initseg) if ($initseg); } -@@ -218,7 +219,17 @@ ___ +@@ -218,8 +219,23 @@ ___ elsif ($::elf) { $initseg.=<<___; .section .init -+#ifdef OPENSSL_PIC ++___ ++ if ($::pic) ++ { $initseg.=<<___; + pushl %ebx + call .pic_point0 +.pic_point0: @@ -120,12 +122,16 @@ + addl \$_GLOBAL_OFFSET_TABLE_+[.-.pic_point0],%ebx + call $f\@PLT + popl %ebx -+#else ++___ ++ } ++ else ++ { $initseg.=<<___; call $f -+#endif ___ ++ } } elsif ($::coff) + { $initseg.=<<___; # applies to both Cygwin and Mingw --- a/crypto/x86cpuid.pl +++ b/crypto/x86cpuid.pl @@ -8,6 +8,8 @@ require "x86asm.pl"; diff --git a/debian/patches/series b/debian/patches/series index a2845d0..040e3df 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -35,7 +35,9 @@ default_bits.patch perlpath-quilt.patch tls12_workarounds.patch ubuntu_deb676533_arm_asm.patch +arm64-support CVE-2013-0166.patch +# Disabled for now, as causes regression on AES-NI CVE-2013-0169.patch fix_key_decoding_deadlock.patch diff --git a/debian/patches/ubuntu_deb676533_arm_asm.patch b/debian/patches/ubuntu_deb676533_arm_asm.patch index 9325394..a484bec 100644 --- a/debian/patches/ubuntu_deb676533_arm_asm.patch +++ b/debian/patches/ubuntu_deb676533_arm_asm.patch @@ -10,7 +10,7 @@ Bug-Ubuntu: https://bugs.launchpad.net/ubuntu/+source/openssl/+bug/1083498 --- a/Configure +++ b/Configure @@ -346,9 +346,8 @@ my %table=( - "debian-alpha","gcc:-DTERMIO $debian_cflag::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_UNROLL:${alpha_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", + "debian-alpha","gcc:-DTERMIO ${debian_cflags}::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_UNROLL:${alpha_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "debian-alpha-ev4","gcc:-DTERMIO ${debian_cflags} -mcpu=ev4::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_UNROLL:${alpha_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", "debian-alpha-ev5","gcc:-DTERMIO ${debian_cflags} -mcpu=ev5::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHUNK DES_RISC1 DES_UNROLL:${alpha_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", -"debian-armeb","gcc:-DB_ENDIAN -DTERMIO ${debian_cflags}::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)", |